akshare-one 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +198 -15
- akshare_one/modules/cache.py +20 -2
- akshare_one/modules/financial/base.py +5 -0
- akshare_one/modules/financial/factory.py +2 -0
- akshare_one/modules/financial/sina.py +125 -106
- akshare_one/modules/historical/base.py +1 -1
- akshare_one/modules/historical/eastmoney.py +9 -14
- akshare_one/modules/historical/eastmoney_direct.py +5 -6
- akshare_one/modules/historical/sina.py +6 -10
- akshare_one/modules/info/base.py +25 -0
- akshare_one/modules/info/eastmoney.py +51 -0
- akshare_one/modules/info/factory.py +44 -0
- akshare_one/modules/insider/base.py +1 -1
- akshare_one/modules/insider/xueqiu.py +8 -13
- akshare_one/modules/news/base.py +1 -1
- akshare_one/modules/news/eastmoney.py +5 -9
- akshare_one/modules/realtime/eastmoney.py +4 -8
- akshare_one/modules/realtime/eastmoney_direct.py +5 -6
- akshare_one/modules/realtime/xueqiu.py +4 -7
- {akshare_one-0.3.2.dist-info → akshare_one-0.3.4.dist-info}/METADATA +9 -5
- akshare_one-0.3.4.dist-info/RECORD +36 -0
- akshare_one/financial.py +0 -46
- akshare_one/insider.py +0 -33
- akshare_one/modules/eastmoney/client.py +0 -88
- akshare_one/modules/eastmoney/utils.py +0 -104
- akshare_one/news.py +0 -27
- akshare_one/stock.py +0 -78
- akshare_one-0.3.2.dist-info/RECORD +0 -39
- {akshare_one-0.3.2.dist-info → akshare_one-0.3.4.dist-info}/WHEEL +0 -0
- {akshare_one-0.3.2.dist-info → akshare_one-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {akshare_one-0.3.2.dist-info → akshare_one-0.3.4.dist-info}/top_level.txt +0 -0
akshare_one/__init__.py
CHANGED
@@ -14,18 +14,201 @@ Example:
|
|
14
14
|
>>> df = get_realtime_data(symbol="600000")
|
15
15
|
"""
|
16
16
|
|
17
|
-
from
|
18
|
-
|
19
|
-
from .
|
20
|
-
from .
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
"
|
29
|
-
|
30
|
-
"
|
31
|
-
|
17
|
+
from typing import Optional, Literal
|
18
|
+
import pandas as pd
|
19
|
+
from .modules.financial.factory import FinancialDataFactory
|
20
|
+
from .modules.historical.factory import HistoricalDataFactory
|
21
|
+
from .modules.realtime.factory import RealtimeDataFactory
|
22
|
+
from .modules.info.factory import InfoDataFactory
|
23
|
+
from .modules.news.factory import NewsDataFactory
|
24
|
+
from .modules.insider.factory import InsiderDataFactory
|
25
|
+
|
26
|
+
|
27
|
+
def get_basic_info(
|
28
|
+
symbol: str, source: Literal["eastmoney"] = "eastmoney"
|
29
|
+
) -> pd.DataFrame:
|
30
|
+
"""获取股票基础信息
|
31
|
+
|
32
|
+
Args:
|
33
|
+
symbol: 股票代码 (e.g. '600000')
|
34
|
+
source: 数据源 ('eastmoney')
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
pd.DataFrame:
|
38
|
+
- price: 最新价
|
39
|
+
- symbol: 股票代码
|
40
|
+
- name: 股票简称
|
41
|
+
- total_shares: 总股本
|
42
|
+
- float_shares: 流通股
|
43
|
+
- total_market_cap: 总市值
|
44
|
+
- float_market_cap: 流通市值
|
45
|
+
- industry: 行业
|
46
|
+
- listing_date: 上市时间
|
47
|
+
"""
|
48
|
+
provider = InfoDataFactory.get_provider(source, symbol=symbol)
|
49
|
+
return provider.get_basic_info()
|
50
|
+
|
51
|
+
|
52
|
+
def get_hist_data(
|
53
|
+
symbol: str,
|
54
|
+
interval: Literal["minute", "hour", "day", "week", "month", "year"] = "day",
|
55
|
+
interval_multiplier: int = 1,
|
56
|
+
start_date: str = "1970-01-01",
|
57
|
+
end_date: str = "2030-12-31",
|
58
|
+
adjust: Literal["none", "qfq", "hfq"] = "none",
|
59
|
+
source: Literal["eastmoney", "eastmoney_direct", "sina"] = "eastmoney",
|
60
|
+
) -> pd.DataFrame:
|
61
|
+
"""Get historical market data
|
62
|
+
|
63
|
+
Args:
|
64
|
+
symbol: 股票代码 (e.g. '600000')
|
65
|
+
interval: 时间间隔 ('minute','hour','day','week','month','year')
|
66
|
+
interval_multiplier: 时间间隔倍数 (e.g. 5 for 5 minutes)
|
67
|
+
start_date: 开始日期 (YYYY-MM-DD)
|
68
|
+
end_date: 结束日期 (YYYY-MM-DD)
|
69
|
+
adjust: 复权类型 ('none','qfq','hfq')
|
70
|
+
source: 数据源 ('eastmoney', 'eastmoney_direct', 'sina') (default: 'eastmoney')
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
pd.DataFrame:
|
74
|
+
- timestamp: 时间戳
|
75
|
+
- open: 开盘价
|
76
|
+
- high: 最高价
|
77
|
+
- low: 最低价
|
78
|
+
- close: 收盘价
|
79
|
+
- volume: 成交量
|
80
|
+
"""
|
81
|
+
kwargs = {
|
82
|
+
"symbol": symbol,
|
83
|
+
"interval": interval,
|
84
|
+
"interval_multiplier": interval_multiplier,
|
85
|
+
"start_date": start_date,
|
86
|
+
"end_date": end_date,
|
87
|
+
"adjust": adjust,
|
88
|
+
}
|
89
|
+
provider = HistoricalDataFactory.get_provider(source, **kwargs)
|
90
|
+
return provider.get_hist_data()
|
91
|
+
|
92
|
+
|
93
|
+
def get_realtime_data(
|
94
|
+
symbol: Optional[str] = None,
|
95
|
+
source: Literal["eastmoney", "eastmoney_direct", "xueqiu"] = "xueqiu",
|
96
|
+
) -> pd.DataFrame:
|
97
|
+
"""Get real-time market quotes
|
98
|
+
|
99
|
+
Args:
|
100
|
+
symbol: 股票代码 (如 "600000")
|
101
|
+
source: 数据源 ('eastmoney', 'eastmoney_direct', 'xueqiu')
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
pd.DataFrame:
|
105
|
+
- symbol: 股票代码
|
106
|
+
- price: 最新价
|
107
|
+
- change: 涨跌额
|
108
|
+
- pct_change: 涨跌幅(%)
|
109
|
+
- timestamp: 时间戳
|
110
|
+
- volume: 成交量(手)
|
111
|
+
- amount: 成交额(元)
|
112
|
+
- open: 今开
|
113
|
+
- high: 最高
|
114
|
+
- low: 最低
|
115
|
+
- prev_close: 昨收
|
116
|
+
"""
|
117
|
+
provider = RealtimeDataFactory.get_provider(source, symbol=symbol)
|
118
|
+
return provider.get_current_data()
|
119
|
+
|
120
|
+
|
121
|
+
def get_news_data(
|
122
|
+
symbol: str, source: Literal["eastmoney"] = "eastmoney"
|
123
|
+
) -> pd.DataFrame:
|
124
|
+
"""获取个股新闻数据
|
125
|
+
|
126
|
+
Args:
|
127
|
+
symbol: 股票代码 (如 "300059")
|
128
|
+
source: 数据源 ('eastmoney')
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
pd.DataFrame:
|
132
|
+
- keyword: 关键词
|
133
|
+
- title: 新闻标题
|
134
|
+
- content: 新闻内容
|
135
|
+
- publish_time: 发布时间
|
136
|
+
- source: 文章来源
|
137
|
+
- url: 新闻链接
|
138
|
+
"""
|
139
|
+
provider = NewsDataFactory.get_provider(source, symbol=symbol)
|
140
|
+
return provider.get_news_data()
|
141
|
+
|
142
|
+
|
143
|
+
def get_balance_sheet(symbol: str, source: Literal["sina"] = "sina") -> pd.DataFrame:
|
144
|
+
"""获取资产负债表数据
|
145
|
+
|
146
|
+
Args:
|
147
|
+
symbol: 股票代码 (如 "600600")
|
148
|
+
source: 数据源 ("sina")
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
pd.DataFrame: 资产负债表数据
|
152
|
+
"""
|
153
|
+
provider = FinancialDataFactory.get_provider(source, symbol=symbol)
|
154
|
+
return provider.get_balance_sheet()
|
155
|
+
|
156
|
+
|
157
|
+
def get_income_statement(symbol: str, source: Literal["sina"] = "sina") -> pd.DataFrame:
|
158
|
+
"""获取利润表数据
|
159
|
+
|
160
|
+
Args:
|
161
|
+
symbol: 股票代码 (如 "600600")
|
162
|
+
source: 数据源 ("sina")
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
pd.DataFrame: 利润表数据
|
166
|
+
"""
|
167
|
+
provider = FinancialDataFactory.get_provider(source, symbol=symbol)
|
168
|
+
return provider.get_income_statement()
|
169
|
+
|
170
|
+
|
171
|
+
def get_cash_flow(symbol: str, source: Literal["sina"] = "sina") -> pd.DataFrame:
|
172
|
+
"""获取现金流量表数据
|
173
|
+
|
174
|
+
Args:
|
175
|
+
symbol: 股票代码 (如 "600600")
|
176
|
+
source: 数据源 ("sina")
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
pd.DataFrame: 现金流量表数据
|
180
|
+
"""
|
181
|
+
provider = FinancialDataFactory.get_provider(source, symbol=symbol)
|
182
|
+
return provider.get_cash_flow()
|
183
|
+
|
184
|
+
|
185
|
+
def get_financial_metrics(
|
186
|
+
symbol: str, source: Literal["eastmoney_direct"] = "eastmoney_direct"
|
187
|
+
) -> pd.DataFrame:
|
188
|
+
"""获取三大财务报表关键指标
|
189
|
+
|
190
|
+
Args:
|
191
|
+
symbol: 股票代码 (如 "600600")
|
192
|
+
source: 数据源 ('eastmoney_direct')
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
pd.DataFrame: 财务关键指标数据
|
196
|
+
"""
|
197
|
+
provider = FinancialDataFactory.get_provider(source, symbol=symbol)
|
198
|
+
return provider.get_financial_metrics()
|
199
|
+
|
200
|
+
|
201
|
+
def get_inner_trade_data(
|
202
|
+
symbol: str, source: Literal["xueqiu"] = "xueqiu"
|
203
|
+
) -> pd.DataFrame:
|
204
|
+
"""获取雪球内部交易数据
|
205
|
+
|
206
|
+
Args:
|
207
|
+
symbol: 股票代码,如"600000"
|
208
|
+
source: 数据源 (目前支持 "xueqiu")
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
pd.DataFrame: 内部交易数据
|
212
|
+
"""
|
213
|
+
provider = InsiderDataFactory.get_provider(source, symbol=symbol)
|
214
|
+
return provider.get_inner_trade_data()
|
akshare_one/modules/cache.py
CHANGED
@@ -1,9 +1,27 @@
|
|
1
|
-
from cachetools import TTLCache
|
1
|
+
from cachetools import TTLCache, cached
|
2
|
+
import os
|
2
3
|
|
3
4
|
# 缓存配置
|
4
5
|
CACHE_CONFIG = {
|
5
6
|
"hist_data_cache": TTLCache(maxsize=1000, ttl=3600), # 历史数据缓存1小时
|
6
7
|
"realtime_cache": TTLCache(maxsize=500, ttl=60), # 实时数据缓存1分钟
|
7
|
-
"news_cache": TTLCache(maxsize=500, ttl=3600),
|
8
|
+
"news_cache": TTLCache(maxsize=500, ttl=3600), # 新闻数据缓存1小时
|
8
9
|
"financial_cache": TTLCache(maxsize=500, ttl=86400), # 财务数据缓存24小时
|
10
|
+
"info_cache": TTLCache(maxsize=500, ttl=86400), # 信息数据缓存24小时
|
9
11
|
}
|
12
|
+
|
13
|
+
|
14
|
+
def cache(cache_key, key=None):
|
15
|
+
cache_enabled = os.getenv("AKSHARE_ONE_CACHE_ENABLED", "true").lower() in (
|
16
|
+
"1",
|
17
|
+
"true",
|
18
|
+
"yes",
|
19
|
+
"on",
|
20
|
+
)
|
21
|
+
|
22
|
+
def decorator(func):
|
23
|
+
if cache_enabled:
|
24
|
+
return cached(CACHE_CONFIG[cache_key], key=key)(func)
|
25
|
+
return func
|
26
|
+
|
27
|
+
return decorator
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from .eastmoney_direct import EastMoneyDirectFinancialReport
|
1
2
|
from .sina import SinaFinancialReport
|
2
3
|
from .base import FinancialDataProvider
|
3
4
|
|
@@ -9,6 +10,7 @@ class FinancialDataFactory:
|
|
9
10
|
|
10
11
|
_providers = {
|
11
12
|
"sina": SinaFinancialReport,
|
13
|
+
"eastmoney_direct": EastMoneyDirectFinancialReport,
|
12
14
|
}
|
13
15
|
|
14
16
|
@classmethod
|
@@ -1,21 +1,25 @@
|
|
1
|
-
from cachetools import cached
|
2
1
|
import pandas as pd
|
3
2
|
import akshare as ak
|
4
3
|
|
5
|
-
from
|
4
|
+
from ..cache import cache
|
6
5
|
from .base import FinancialDataProvider
|
7
6
|
|
8
7
|
|
9
8
|
class SinaFinancialReport(FinancialDataProvider):
|
9
|
+
"""Financial data provider for Sina finance reports.
|
10
|
+
|
11
|
+
Provides standardized access to balance sheet, income statement,
|
12
|
+
and cash flow data from Sina finance API.
|
13
|
+
"""
|
14
|
+
|
10
15
|
def __init__(self, symbol: str) -> None:
|
11
16
|
super().__init__(symbol)
|
12
17
|
self.stock = (
|
13
18
|
f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
14
19
|
)
|
15
20
|
|
16
|
-
@
|
17
|
-
|
18
|
-
key=lambda self, symbol=None: f"sina_balance_{self.symbol}",
|
21
|
+
@cache(
|
22
|
+
"financial_cache", key=lambda self, symbol=None: f"sina_balance_{self.symbol}"
|
19
23
|
)
|
20
24
|
def get_balance_sheet(self) -> pd.DataFrame:
|
21
25
|
"""获取资产负债表数据
|
@@ -29,9 +33,8 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
29
33
|
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="资产负债表")
|
30
34
|
return self._clean_balance_data(raw_df)
|
31
35
|
|
32
|
-
@
|
33
|
-
|
34
|
-
key=lambda self, symbol=None: f"sina_income_{self.symbol}",
|
36
|
+
@cache(
|
37
|
+
"financial_cache", key=lambda self, symbol=None: f"sina_income_{self.symbol}"
|
35
38
|
)
|
36
39
|
def get_income_statement(self) -> pd.DataFrame:
|
37
40
|
"""获取利润表数据
|
@@ -45,10 +48,7 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
45
48
|
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="利润表")
|
46
49
|
return self._clean_income_data(raw_df)
|
47
50
|
|
48
|
-
@
|
49
|
-
CACHE_CONFIG["financial_cache"],
|
50
|
-
key=lambda self, symbol=None: f"sina_cash_{self.symbol}",
|
51
|
-
)
|
51
|
+
@cache("financial_cache", key=lambda self, symbol=None: f"sina_cash_{self.symbol}")
|
52
52
|
def get_cash_flow(self) -> pd.DataFrame:
|
53
53
|
"""获取现金流量表数据
|
54
54
|
|
@@ -77,40 +77,41 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
77
77
|
raw_df["report_date"], format="%Y%m%d"
|
78
78
|
)
|
79
79
|
|
80
|
-
|
81
|
-
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
82
|
-
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
83
|
-
|
84
|
-
# Standardize column names
|
80
|
+
# Define column mappings and required columns
|
85
81
|
column_mapping = {
|
86
|
-
"类型": "report_type",
|
87
82
|
"币种": "currency",
|
88
|
-
"净利润": "net_income",
|
89
|
-
"固定资产折旧、油气资产折耗、生产性生物资产折旧": "depreciation_and_amortization",
|
90
|
-
"无形资产摊销": "share_based_compensation",
|
91
83
|
"经营活动产生的现金流量净额": "net_cash_flow_from_operations",
|
92
84
|
"购建固定资产、无形资产和其他长期资产支付的现金": "capital_expenditure",
|
93
85
|
"取得子公司及其他营业单位支付的现金净额": "business_acquisitions_and_disposals",
|
94
|
-
"投资支付的现金": "investment_acquisitions_and_disposals",
|
95
86
|
"投资活动产生的现金流量净额": "net_cash_flow_from_investing",
|
96
87
|
"取得借款收到的现金": "issuance_or_repayment_of_debt_securities",
|
97
88
|
"吸收投资收到的现金": "issuance_or_purchase_of_equity_shares",
|
98
|
-
"分配股利、利润或偿付利息支付的现金": "dividends_and_other_cash_distributions",
|
99
89
|
"筹资活动产生的现金流量净额": "net_cash_flow_from_financing",
|
100
90
|
"现金及现金等价物净增加额": "change_in_cash_and_equivalents",
|
101
91
|
"汇率变动对现金及现金等价物的影响": "effect_of_exchange_rate_changes",
|
102
92
|
"期末现金及现金等价物余额": "ending_cash_balance",
|
103
|
-
"
|
93
|
+
"销售商品、提供劳务收到的现金": "cash_from_sales",
|
94
|
+
"收到的税费返还": "tax_refunds_received",
|
95
|
+
"支付给职工以及为职工支付的现金": "cash_paid_to_employees",
|
96
|
+
"支付的各项税费": "taxes_paid",
|
97
|
+
"经营活动现金流入小计": "total_cash_inflow_from_operations",
|
98
|
+
"经营活动现金流出小计": "total_cash_outflow_from_operations",
|
99
|
+
"收回投资所收到的现金": "cash_from_investment_recovery",
|
100
|
+
"取得投资收益收到的现金": "cash_from_investment_income",
|
101
|
+
"处置固定资产、无形资产收回的现金": "cash_from_asset_sales",
|
102
|
+
"投资活动现金流入小计": "total_cash_inflow_from_investing",
|
103
|
+
"投资活动现金流出小计": "total_cash_outflow_from_investing",
|
104
|
+
"分配股利、利润或偿付利息所支付的现金": "cash_paid_for_dividends_and_interest",
|
105
|
+
"偿还债务支付的现金": "cash_paid_for_debt_repayment",
|
106
|
+
"筹资活动现金流入小计": "total_cash_inflow_from_financing",
|
107
|
+
"筹资活动现金流出小计": "total_cash_outflow_from_financing",
|
108
|
+
"期初现金及现金等价物余额": "beginning_cash_balance",
|
109
|
+
"现金的期末余额": "ending_cash",
|
110
|
+
"现金等价物的期末余额": "ending_cash_equivalents",
|
104
111
|
}
|
105
|
-
raw_df = raw_df.rename(columns=column_mapping)
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
required_columns.extend(column_mapping.values())
|
110
|
-
|
111
|
-
# Filter columns
|
112
|
-
available_columns = [col for col in required_columns if col in raw_df.columns]
|
113
|
-
return raw_df[available_columns]
|
113
|
+
required_columns = ["report_date"] + list(column_mapping.values())
|
114
|
+
return raw_df.rename(columns=column_mapping).reindex(columns=required_columns)
|
114
115
|
|
115
116
|
def _clean_balance_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
116
117
|
"""清理和标准化资产负债表数据
|
@@ -128,47 +129,48 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
128
129
|
raw_df["report_date"], format="%Y%m%d"
|
129
130
|
)
|
130
131
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
132
|
+
# Define and apply column mappings in one optimized operation
|
133
|
+
raw_df = raw_df.rename(
|
134
|
+
columns={
|
135
|
+
"币种": "currency",
|
136
|
+
"资产总计": "total_assets",
|
137
|
+
"流动资产合计": "current_assets",
|
138
|
+
"货币资金": "cash_and_equivalents",
|
139
|
+
"存货": "inventory",
|
140
|
+
"交易性金融资产": "current_investments",
|
141
|
+
"应收票据及应收账款": "trade_and_non_trade_receivables",
|
142
|
+
"非流动资产合计": "non_current_assets",
|
143
|
+
"固定资产": "property_plant_and_equipment",
|
144
|
+
"商誉": "goodwill_and_intangible_assets",
|
145
|
+
"长期股权投资": "investments",
|
146
|
+
"其他非流动金融资产": "non_current_investments",
|
147
|
+
"实收资本(或股本)": "outstanding_shares",
|
148
|
+
"递延所得税资产": "tax_assets",
|
149
|
+
"负债合计": "total_liabilities",
|
150
|
+
"流动负债合计": "current_liabilities",
|
151
|
+
"短期借款": "current_debt",
|
152
|
+
"应付票据及应付账款": "trade_and_non_trade_payables",
|
153
|
+
"合同负债": "deferred_revenue",
|
154
|
+
"吸收存款及同业存放": "deposit_liabilities",
|
155
|
+
"非流动负债合计": "non_current_liabilities",
|
156
|
+
"长期借款": "non_current_debt",
|
157
|
+
"递延所得税负债": "tax_liabilities",
|
158
|
+
"所有者权益(或股东权益)合计": "shareholders_equity",
|
159
|
+
"未分配利润": "retained_earnings",
|
160
|
+
"其他综合收益": "accumulated_other_comprehensive_income",
|
161
|
+
"应收账款": "accounts_receivable",
|
162
|
+
"预付款项": "prepayments",
|
163
|
+
"其他应收款": "other_receivables",
|
164
|
+
"固定资产净值": "fixed_assets_net",
|
165
|
+
"在建工程": "construction_in_progress",
|
166
|
+
"资本公积": "capital_reserve",
|
167
|
+
"少数股东权益": "minority_interest",
|
168
|
+
}
|
169
|
+
)
|
166
170
|
|
167
171
|
# Select only required columns
|
168
172
|
required_columns = [
|
169
173
|
"report_date",
|
170
|
-
"report_period",
|
171
|
-
"period",
|
172
174
|
"currency",
|
173
175
|
"total_assets",
|
174
176
|
"current_assets",
|
@@ -195,16 +197,49 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
195
197
|
"shareholders_equity",
|
196
198
|
"retained_earnings",
|
197
199
|
"accumulated_other_comprehensive_income",
|
200
|
+
"accounts_receivable",
|
201
|
+
"prepayments",
|
202
|
+
"other_receivables",
|
203
|
+
"fixed_assets_net",
|
204
|
+
"construction_in_progress",
|
205
|
+
"capital_reserve",
|
206
|
+
"current_ratio",
|
207
|
+
"debt_to_assets",
|
208
|
+
"minority_interest",
|
198
209
|
]
|
199
210
|
|
200
|
-
# Calculate
|
201
|
-
|
202
|
-
|
203
|
-
|
211
|
+
# Calculate financial ratios using vectorized operations
|
212
|
+
cols = ["current_debt", "non_current_debt"]
|
213
|
+
raw_df[cols] = raw_df[cols].apply(pd.to_numeric, errors="coerce")
|
214
|
+
raw_df["total_debt"] = raw_df[cols].fillna(0).sum(axis=1)
|
215
|
+
|
216
|
+
# Pre-calculate denominator conditions
|
217
|
+
valid_current_liab = raw_df["current_liabilities"].ne(0)
|
218
|
+
valid_total_assets = raw_df["total_assets"].ne(0)
|
219
|
+
|
220
|
+
# Calculate ratios in one operation
|
221
|
+
ratios = pd.DataFrame(
|
222
|
+
{
|
223
|
+
"current_ratio": raw_df["current_assets"]
|
224
|
+
/ raw_df["current_liabilities"],
|
225
|
+
"cash_ratio": raw_df["cash_and_equivalents"]
|
226
|
+
/ raw_df["current_liabilities"],
|
227
|
+
"debt_to_assets": raw_df["total_debt"] / raw_df["total_assets"],
|
228
|
+
}
|
229
|
+
)
|
204
230
|
|
205
|
-
#
|
206
|
-
|
207
|
-
|
231
|
+
# Apply conditions
|
232
|
+
cond = pd.DataFrame(
|
233
|
+
{
|
234
|
+
"current_ratio": valid_current_liab,
|
235
|
+
"cash_ratio": valid_current_liab,
|
236
|
+
"debt_to_assets": valid_total_assets,
|
237
|
+
},
|
238
|
+
index=ratios.index,
|
239
|
+
)
|
240
|
+
raw_df = raw_df.join(ratios.where(cond))
|
241
|
+
|
242
|
+
return raw_df.reindex(columns=required_columns)
|
208
243
|
|
209
244
|
def _clean_income_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
210
245
|
"""清理和标准化利润表数据
|
@@ -222,15 +257,12 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
222
257
|
raw_df["report_date"], format="%Y%m%d"
|
223
258
|
)
|
224
259
|
|
225
|
-
|
226
|
-
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
227
|
-
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
228
|
-
|
229
|
-
# Standardize column names
|
260
|
+
# Define column mappings and required columns
|
230
261
|
column_mapping = {
|
231
|
-
"类型": "report_type",
|
232
262
|
"币种": "currency",
|
233
263
|
"营业总收入": "revenue",
|
264
|
+
"营业收入": "operating_revenue",
|
265
|
+
"营业总成本": "total_operating_costs",
|
234
266
|
"营业成本": "cost_of_revenue",
|
235
267
|
"营业利润": "operating_profit",
|
236
268
|
"销售费用": "selling_general_and_administrative_expenses",
|
@@ -244,30 +276,17 @@ class SinaFinancialReport(FinancialDataProvider):
|
|
244
276
|
"少数股东损益": "net_income_non_controlling_interests",
|
245
277
|
"基本每股收益": "earnings_per_share",
|
246
278
|
"稀释每股收益": "earnings_per_share_diluted",
|
279
|
+
"投资收益": "investment_income",
|
280
|
+
"公允价值变动收益": "fair_value_adjustments",
|
281
|
+
"资产减值损失": "asset_impairment_loss",
|
282
|
+
"财务费用": "financial_expenses",
|
283
|
+
"营业税金及附加": "taxes_and_surcharges",
|
284
|
+
"其他综合收益": "other_comprehensive_income",
|
285
|
+
"综合收益总额": "total_comprehensive_income",
|
247
286
|
}
|
248
|
-
raw_df = raw_df.rename(columns=column_mapping)
|
249
287
|
|
250
|
-
|
251
|
-
|
252
|
-
"report_date",
|
253
|
-
"period",
|
254
|
-
"currency",
|
255
|
-
"revenue",
|
256
|
-
"cost_of_revenue",
|
257
|
-
"operating_profit",
|
258
|
-
"operating_expense",
|
259
|
-
"selling_general_and_administrative_expenses",
|
260
|
-
"research_and_development",
|
261
|
-
"interest_expense",
|
262
|
-
"ebit",
|
263
|
-
"income_tax_expense",
|
264
|
-
"net_income",
|
265
|
-
"net_income_common_stock",
|
266
|
-
"net_income_non_controlling_interests",
|
267
|
-
"earnings_per_share",
|
268
|
-
"earnings_per_share_diluted",
|
269
|
-
]
|
288
|
+
required_columns = ["report_date"] + list(column_mapping.values())
|
289
|
+
return raw_df.rename(columns=column_mapping).reindex(columns=required_columns)
|
270
290
|
|
271
|
-
|
272
|
-
|
273
|
-
return raw_df[available_columns]
|
291
|
+
def get_financial_metrics(self):
|
292
|
+
pass
|
@@ -1,15 +1,14 @@
|
|
1
|
-
from cachetools import cached
|
2
1
|
from .base import HistoricalDataProvider
|
3
2
|
import akshare as ak
|
4
3
|
import pandas as pd
|
5
|
-
from ..cache import
|
4
|
+
from ..cache import cache
|
6
5
|
|
7
6
|
|
8
7
|
class EastMoneyHistorical(HistoricalDataProvider):
|
9
8
|
"""Adapter for EastMoney historical stock data API"""
|
10
9
|
|
11
|
-
@
|
12
|
-
|
10
|
+
@cache(
|
11
|
+
"hist_data_cache",
|
13
12
|
key=lambda self: f"eastmoney_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
14
13
|
)
|
15
14
|
def get_hist_data(self) -> pd.DataFrame:
|
@@ -157,7 +156,7 @@ class EastMoneyHistorical(HistoricalDataProvider):
|
|
157
156
|
return resampled.reset_index()
|
158
157
|
|
159
158
|
def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
|
160
|
-
"""Cleans and standardizes minute/hour level data
|
159
|
+
"""Cleans and standardizes minute/hour level data"""
|
161
160
|
column_map = {
|
162
161
|
"1": {
|
163
162
|
"时间": "timestamp",
|
@@ -188,16 +187,14 @@ class EastMoneyHistorical(HistoricalDataProvider):
|
|
188
187
|
df = raw_df.rename(columns=mapping)
|
189
188
|
|
190
189
|
if "timestamp" in df.columns:
|
191
|
-
df["timestamp"] = (
|
192
|
-
|
193
|
-
.dt.tz_localize("Asia/Shanghai")
|
194
|
-
.dt.tz_convert("UTC")
|
190
|
+
df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize(
|
191
|
+
"Asia/Shanghai"
|
195
192
|
)
|
196
193
|
|
197
194
|
return self._select_standard_columns(df)
|
198
195
|
|
199
196
|
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
200
|
-
"""Cleans and standardizes daily and higher-level data
|
197
|
+
"""Cleans and standardizes daily and higher-level data"""
|
201
198
|
column_map = {
|
202
199
|
"日期": "timestamp",
|
203
200
|
"开盘": "open",
|
@@ -217,10 +214,8 @@ class EastMoneyHistorical(HistoricalDataProvider):
|
|
217
214
|
df = raw_df.rename(columns=available_columns)
|
218
215
|
|
219
216
|
if "timestamp" in df.columns:
|
220
|
-
df["timestamp"] = (
|
221
|
-
|
222
|
-
.dt.tz_localize("Asia/Shanghai")
|
223
|
-
.dt.tz_convert("UTC")
|
217
|
+
df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize(
|
218
|
+
"Asia/Shanghai"
|
224
219
|
)
|
225
220
|
|
226
221
|
if "volume" in df.columns:
|