akshare-one 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +31 -31
- akshare_one/financial.py +46 -46
- akshare_one/indicators.py +395 -0
- akshare_one/insider.py +33 -33
- akshare_one/modules/cache.py +9 -9
- akshare_one/modules/eastmoney/client.py +88 -88
- akshare_one/modules/eastmoney/utils.py +104 -104
- akshare_one/modules/financial/base.py +22 -22
- akshare_one/modules/financial/factory.py +44 -44
- akshare_one/modules/financial/sina.py +273 -273
- akshare_one/modules/historical/base.py +47 -39
- akshare_one/modules/historical/eastmoney.py +241 -241
- akshare_one/modules/historical/eastmoney_direct.py +79 -79
- akshare_one/modules/historical/factory.py +48 -48
- akshare_one/modules/historical/sina.py +218 -218
- akshare_one/modules/indicators/__init__.py +0 -0
- akshare_one/modules/indicators/base.py +158 -0
- akshare_one/modules/indicators/factory.py +33 -0
- akshare_one/modules/indicators/simple.py +230 -0
- akshare_one/modules/indicators/talib.py +263 -0
- akshare_one/modules/insider/base.py +28 -28
- akshare_one/modules/insider/factory.py +44 -44
- akshare_one/modules/insider/xueqiu.py +115 -115
- akshare_one/modules/news/base.py +22 -22
- akshare_one/modules/news/eastmoney.py +47 -47
- akshare_one/modules/news/factory.py +44 -44
- akshare_one/modules/realtime/base.py +27 -27
- akshare_one/modules/realtime/eastmoney.py +57 -57
- akshare_one/modules/realtime/eastmoney_direct.py +37 -37
- akshare_one/modules/realtime/factory.py +48 -48
- akshare_one/modules/realtime/xueqiu.py +60 -60
- akshare_one/modules/utils.py +10 -10
- akshare_one/news.py +27 -27
- akshare_one/stock.py +78 -78
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/METADATA +70 -66
- akshare_one-0.3.0.dist-info/RECORD +39 -0
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/licenses/LICENSE +21 -21
- akshare_one-0.2.2.dist-info/RECORD +0 -33
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/WHEEL +0 -0
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/top_level.txt +0 -0
@@ -1,273 +1,273 @@
|
|
1
|
-
from cachetools import cached
|
2
|
-
import pandas as pd
|
3
|
-
import akshare as ak
|
4
|
-
|
5
|
-
from akshare_one.modules.cache import CACHE_CONFIG
|
6
|
-
from .base import FinancialDataProvider
|
7
|
-
|
8
|
-
|
9
|
-
class SinaFinancialReport(FinancialDataProvider):
|
10
|
-
def __init__(self, symbol: str) -> None:
|
11
|
-
super().__init__(symbol)
|
12
|
-
self.stock = (
|
13
|
-
f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
14
|
-
)
|
15
|
-
|
16
|
-
@cached(
|
17
|
-
CACHE_CONFIG["financial_cache"],
|
18
|
-
key=lambda self, symbol=None: f"sina_balance_{self.symbol}",
|
19
|
-
)
|
20
|
-
def get_balance_sheet(self) -> pd.DataFrame:
|
21
|
-
"""获取资产负债表数据
|
22
|
-
|
23
|
-
Args:
|
24
|
-
symbol: 股票代码 (如 "600600")
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
Standardized DataFrame with balance sheet data
|
28
|
-
"""
|
29
|
-
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="资产负债表")
|
30
|
-
return self._clean_balance_data(raw_df)
|
31
|
-
|
32
|
-
@cached(
|
33
|
-
CACHE_CONFIG["financial_cache"],
|
34
|
-
key=lambda self, symbol=None: f"sina_income_{self.symbol}",
|
35
|
-
)
|
36
|
-
def get_income_statement(self) -> pd.DataFrame:
|
37
|
-
"""获取利润表数据
|
38
|
-
|
39
|
-
Args:
|
40
|
-
symbol: 股票代码 (如 "600600")
|
41
|
-
|
42
|
-
Returns:
|
43
|
-
Standardized DataFrame with income statement data
|
44
|
-
"""
|
45
|
-
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="利润表")
|
46
|
-
return self._clean_income_data(raw_df)
|
47
|
-
|
48
|
-
@cached(
|
49
|
-
CACHE_CONFIG["financial_cache"],
|
50
|
-
key=lambda self, symbol=None: f"sina_cash_{self.symbol}",
|
51
|
-
)
|
52
|
-
def get_cash_flow(self) -> pd.DataFrame:
|
53
|
-
"""获取现金流量表数据
|
54
|
-
|
55
|
-
Args:
|
56
|
-
symbol: 股票代码 (如 "600600")
|
57
|
-
|
58
|
-
Returns:
|
59
|
-
Standardized DataFrame with cash flow data
|
60
|
-
"""
|
61
|
-
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="现金流量表")
|
62
|
-
return self._clean_cash_data(raw_df)
|
63
|
-
|
64
|
-
def _clean_cash_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
65
|
-
"""清理和标准化现金流量表数据
|
66
|
-
|
67
|
-
Args:
|
68
|
-
raw_df: Raw DataFrame from Sina API
|
69
|
-
|
70
|
-
Returns:
|
71
|
-
Standardized DataFrame with consistent columns
|
72
|
-
"""
|
73
|
-
# Convert timestamp columns if exists
|
74
|
-
if "报告日" in raw_df.columns:
|
75
|
-
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
76
|
-
raw_df["report_date"] = pd.to_datetime(
|
77
|
-
raw_df["report_date"], format="%Y%m%d"
|
78
|
-
)
|
79
|
-
|
80
|
-
if "更新日期" in raw_df.columns:
|
81
|
-
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
82
|
-
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
83
|
-
|
84
|
-
# Standardize column names
|
85
|
-
column_mapping = {
|
86
|
-
"类型": "report_type",
|
87
|
-
"币种": "currency",
|
88
|
-
"净利润": "net_income",
|
89
|
-
"固定资产折旧、油气资产折耗、生产性生物资产折旧": "depreciation_and_amortization",
|
90
|
-
"无形资产摊销": "share_based_compensation",
|
91
|
-
"经营活动产生的现金流量净额": "net_cash_flow_from_operations",
|
92
|
-
"购建固定资产、无形资产和其他长期资产支付的现金": "capital_expenditure",
|
93
|
-
"取得子公司及其他营业单位支付的现金净额": "business_acquisitions_and_disposals",
|
94
|
-
"投资支付的现金": "investment_acquisitions_and_disposals",
|
95
|
-
"投资活动产生的现金流量净额": "net_cash_flow_from_investing",
|
96
|
-
"取得借款收到的现金": "issuance_or_repayment_of_debt_securities",
|
97
|
-
"吸收投资收到的现金": "issuance_or_purchase_of_equity_shares",
|
98
|
-
"分配股利、利润或偿付利息支付的现金": "dividends_and_other_cash_distributions",
|
99
|
-
"筹资活动产生的现金流量净额": "net_cash_flow_from_financing",
|
100
|
-
"现金及现金等价物净增加额": "change_in_cash_and_equivalents",
|
101
|
-
"汇率变动对现金及现金等价物的影响": "effect_of_exchange_rate_changes",
|
102
|
-
"期末现金及现金等价物余额": "ending_cash_balance",
|
103
|
-
"自由现金流": "free_cash_flow",
|
104
|
-
}
|
105
|
-
raw_df = raw_df.rename(columns=column_mapping)
|
106
|
-
|
107
|
-
# Select only required columns
|
108
|
-
required_columns = ["report_date"]
|
109
|
-
required_columns.extend(column_mapping.values())
|
110
|
-
|
111
|
-
# Filter columns
|
112
|
-
available_columns = [col for col in required_columns if col in raw_df.columns]
|
113
|
-
return raw_df[available_columns]
|
114
|
-
|
115
|
-
def _clean_balance_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
116
|
-
"""清理和标准化资产负债表数据
|
117
|
-
|
118
|
-
Args:
|
119
|
-
raw_df: Raw DataFrame from Sina API
|
120
|
-
|
121
|
-
Returns:
|
122
|
-
Standardized DataFrame with consistent columns
|
123
|
-
"""
|
124
|
-
# Convert timestamp columns if exists
|
125
|
-
if "报告日" in raw_df.columns:
|
126
|
-
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
127
|
-
raw_df["report_date"] = pd.to_datetime(
|
128
|
-
raw_df["report_date"], format="%Y%m%d"
|
129
|
-
)
|
130
|
-
|
131
|
-
if "更新日期" in raw_df.columns:
|
132
|
-
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
133
|
-
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
134
|
-
|
135
|
-
# Standardize column names
|
136
|
-
column_mapping = {
|
137
|
-
"类型": "report_type",
|
138
|
-
"币种": "currency",
|
139
|
-
"资产总计": "total_assets",
|
140
|
-
"流动资产合计": "current_assets",
|
141
|
-
"货币资金": "cash_and_equivalents",
|
142
|
-
"存货": "inventory",
|
143
|
-
"交易性金融资产": "current_investments",
|
144
|
-
"应收票据及应收账款": "trade_and_non_trade_receivables",
|
145
|
-
"非流动资产合计": "non_current_assets",
|
146
|
-
"固定资产": "property_plant_and_equipment",
|
147
|
-
"商誉": "goodwill_and_intangible_assets",
|
148
|
-
"长期股权投资": "investments",
|
149
|
-
"其他非流动金融资产": "non_current_investments",
|
150
|
-
"实收资本(或股本)": "outstanding_shares",
|
151
|
-
"递延所得税资产": "tax_assets",
|
152
|
-
"负债合计": "total_liabilities",
|
153
|
-
"流动负债合计": "current_liabilities",
|
154
|
-
"短期借款": "current_debt",
|
155
|
-
"应付票据及应付账款": "trade_and_non_trade_payables",
|
156
|
-
"合同负债": "deferred_revenue",
|
157
|
-
"吸收存款及同业存放": "deposit_liabilities",
|
158
|
-
"非流动负债合计": "non_current_liabilities",
|
159
|
-
"长期借款": "non_current_debt",
|
160
|
-
"递延所得税负债": "tax_liabilities",
|
161
|
-
"所有者权益(或股东权益)合计": "shareholders_equity",
|
162
|
-
"未分配利润": "retained_earnings",
|
163
|
-
"其他综合收益": "accumulated_other_comprehensive_income",
|
164
|
-
}
|
165
|
-
raw_df = raw_df.rename(columns=column_mapping)
|
166
|
-
|
167
|
-
# Select only required columns
|
168
|
-
required_columns = [
|
169
|
-
"report_date",
|
170
|
-
"report_period",
|
171
|
-
"period",
|
172
|
-
"currency",
|
173
|
-
"total_assets",
|
174
|
-
"current_assets",
|
175
|
-
"cash_and_equivalents",
|
176
|
-
"inventory",
|
177
|
-
"current_investments",
|
178
|
-
"trade_and_non_trade_receivables",
|
179
|
-
"non_current_assets",
|
180
|
-
"property_plant_and_equipment",
|
181
|
-
"goodwill_and_intangible_assets",
|
182
|
-
"investments",
|
183
|
-
"non_current_investments",
|
184
|
-
"outstanding_shares",
|
185
|
-
"tax_assets",
|
186
|
-
"total_liabilities",
|
187
|
-
"current_liabilities",
|
188
|
-
"current_debt",
|
189
|
-
"trade_and_non_trade_payables",
|
190
|
-
"deferred_revenue",
|
191
|
-
"deposit_liabilities",
|
192
|
-
"non_current_liabilities",
|
193
|
-
"non_current_debt",
|
194
|
-
"tax_liabilities",
|
195
|
-
"shareholders_equity",
|
196
|
-
"retained_earnings",
|
197
|
-
"accumulated_other_comprehensive_income",
|
198
|
-
]
|
199
|
-
|
200
|
-
# Calculate total_debt
|
201
|
-
if "current_debt" in raw_df.columns and "non_current_debt" in raw_df.columns:
|
202
|
-
raw_df["total_debt"] = raw_df["current_debt"] + raw_df["non_current_debt"]
|
203
|
-
required_columns.append("total_debt")
|
204
|
-
|
205
|
-
# Filter columns
|
206
|
-
available_columns = [col for col in required_columns if col in raw_df.columns]
|
207
|
-
return raw_df[available_columns]
|
208
|
-
|
209
|
-
def _clean_income_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
210
|
-
"""清理和标准化利润表数据
|
211
|
-
|
212
|
-
Args:
|
213
|
-
raw_df: Raw DataFrame from Sina API
|
214
|
-
|
215
|
-
Returns:
|
216
|
-
Standardized DataFrame with consistent columns
|
217
|
-
"""
|
218
|
-
# Convert timestamp columns if exists
|
219
|
-
if "报告日" in raw_df.columns:
|
220
|
-
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
221
|
-
raw_df["report_date"] = pd.to_datetime(
|
222
|
-
raw_df["report_date"], format="%Y%m%d"
|
223
|
-
)
|
224
|
-
|
225
|
-
if "更新日期" in raw_df.columns:
|
226
|
-
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
227
|
-
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
228
|
-
|
229
|
-
# Standardize column names
|
230
|
-
column_mapping = {
|
231
|
-
"类型": "report_type",
|
232
|
-
"币种": "currency",
|
233
|
-
"营业总收入": "revenue",
|
234
|
-
"营业成本": "cost_of_revenue",
|
235
|
-
"营业利润": "operating_profit",
|
236
|
-
"销售费用": "selling_general_and_administrative_expenses",
|
237
|
-
"管理费用": "operating_expense",
|
238
|
-
"研发费用": "research_and_development",
|
239
|
-
"利息支出": "interest_expense",
|
240
|
-
"利润总额": "ebit",
|
241
|
-
"所得税费用": "income_tax_expense",
|
242
|
-
"净利润": "net_income",
|
243
|
-
"归属于母公司所有者的净利润": "net_income_common_stock",
|
244
|
-
"少数股东损益": "net_income_non_controlling_interests",
|
245
|
-
"基本每股收益": "earnings_per_share",
|
246
|
-
"稀释每股收益": "earnings_per_share_diluted",
|
247
|
-
}
|
248
|
-
raw_df = raw_df.rename(columns=column_mapping)
|
249
|
-
|
250
|
-
# Select only required columns
|
251
|
-
required_columns = [
|
252
|
-
"report_date",
|
253
|
-
"period",
|
254
|
-
"currency",
|
255
|
-
"revenue",
|
256
|
-
"cost_of_revenue",
|
257
|
-
"operating_profit",
|
258
|
-
"operating_expense",
|
259
|
-
"selling_general_and_administrative_expenses",
|
260
|
-
"research_and_development",
|
261
|
-
"interest_expense",
|
262
|
-
"ebit",
|
263
|
-
"income_tax_expense",
|
264
|
-
"net_income",
|
265
|
-
"net_income_common_stock",
|
266
|
-
"net_income_non_controlling_interests",
|
267
|
-
"earnings_per_share",
|
268
|
-
"earnings_per_share_diluted",
|
269
|
-
]
|
270
|
-
|
271
|
-
# Filter columns
|
272
|
-
available_columns = [col for col in required_columns if col in raw_df.columns]
|
273
|
-
return raw_df[available_columns]
|
1
|
+
from cachetools import cached
|
2
|
+
import pandas as pd
|
3
|
+
import akshare as ak
|
4
|
+
|
5
|
+
from akshare_one.modules.cache import CACHE_CONFIG
|
6
|
+
from .base import FinancialDataProvider
|
7
|
+
|
8
|
+
|
9
|
+
class SinaFinancialReport(FinancialDataProvider):
|
10
|
+
def __init__(self, symbol: str) -> None:
|
11
|
+
super().__init__(symbol)
|
12
|
+
self.stock = (
|
13
|
+
f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
14
|
+
)
|
15
|
+
|
16
|
+
@cached(
|
17
|
+
CACHE_CONFIG["financial_cache"],
|
18
|
+
key=lambda self, symbol=None: f"sina_balance_{self.symbol}",
|
19
|
+
)
|
20
|
+
def get_balance_sheet(self) -> pd.DataFrame:
|
21
|
+
"""获取资产负债表数据
|
22
|
+
|
23
|
+
Args:
|
24
|
+
symbol: 股票代码 (如 "600600")
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
Standardized DataFrame with balance sheet data
|
28
|
+
"""
|
29
|
+
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="资产负债表")
|
30
|
+
return self._clean_balance_data(raw_df)
|
31
|
+
|
32
|
+
@cached(
|
33
|
+
CACHE_CONFIG["financial_cache"],
|
34
|
+
key=lambda self, symbol=None: f"sina_income_{self.symbol}",
|
35
|
+
)
|
36
|
+
def get_income_statement(self) -> pd.DataFrame:
|
37
|
+
"""获取利润表数据
|
38
|
+
|
39
|
+
Args:
|
40
|
+
symbol: 股票代码 (如 "600600")
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
Standardized DataFrame with income statement data
|
44
|
+
"""
|
45
|
+
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="利润表")
|
46
|
+
return self._clean_income_data(raw_df)
|
47
|
+
|
48
|
+
@cached(
|
49
|
+
CACHE_CONFIG["financial_cache"],
|
50
|
+
key=lambda self, symbol=None: f"sina_cash_{self.symbol}",
|
51
|
+
)
|
52
|
+
def get_cash_flow(self) -> pd.DataFrame:
|
53
|
+
"""获取现金流量表数据
|
54
|
+
|
55
|
+
Args:
|
56
|
+
symbol: 股票代码 (如 "600600")
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
Standardized DataFrame with cash flow data
|
60
|
+
"""
|
61
|
+
raw_df = ak.stock_financial_report_sina(stock=self.stock, symbol="现金流量表")
|
62
|
+
return self._clean_cash_data(raw_df)
|
63
|
+
|
64
|
+
def _clean_cash_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
65
|
+
"""清理和标准化现金流量表数据
|
66
|
+
|
67
|
+
Args:
|
68
|
+
raw_df: Raw DataFrame from Sina API
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
Standardized DataFrame with consistent columns
|
72
|
+
"""
|
73
|
+
# Convert timestamp columns if exists
|
74
|
+
if "报告日" in raw_df.columns:
|
75
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
76
|
+
raw_df["report_date"] = pd.to_datetime(
|
77
|
+
raw_df["report_date"], format="%Y%m%d"
|
78
|
+
)
|
79
|
+
|
80
|
+
if "更新日期" in raw_df.columns:
|
81
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
82
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
83
|
+
|
84
|
+
# Standardize column names
|
85
|
+
column_mapping = {
|
86
|
+
"类型": "report_type",
|
87
|
+
"币种": "currency",
|
88
|
+
"净利润": "net_income",
|
89
|
+
"固定资产折旧、油气资产折耗、生产性生物资产折旧": "depreciation_and_amortization",
|
90
|
+
"无形资产摊销": "share_based_compensation",
|
91
|
+
"经营活动产生的现金流量净额": "net_cash_flow_from_operations",
|
92
|
+
"购建固定资产、无形资产和其他长期资产支付的现金": "capital_expenditure",
|
93
|
+
"取得子公司及其他营业单位支付的现金净额": "business_acquisitions_and_disposals",
|
94
|
+
"投资支付的现金": "investment_acquisitions_and_disposals",
|
95
|
+
"投资活动产生的现金流量净额": "net_cash_flow_from_investing",
|
96
|
+
"取得借款收到的现金": "issuance_or_repayment_of_debt_securities",
|
97
|
+
"吸收投资收到的现金": "issuance_or_purchase_of_equity_shares",
|
98
|
+
"分配股利、利润或偿付利息支付的现金": "dividends_and_other_cash_distributions",
|
99
|
+
"筹资活动产生的现金流量净额": "net_cash_flow_from_financing",
|
100
|
+
"现金及现金等价物净增加额": "change_in_cash_and_equivalents",
|
101
|
+
"汇率变动对现金及现金等价物的影响": "effect_of_exchange_rate_changes",
|
102
|
+
"期末现金及现金等价物余额": "ending_cash_balance",
|
103
|
+
"自由现金流": "free_cash_flow",
|
104
|
+
}
|
105
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
106
|
+
|
107
|
+
# Select only required columns
|
108
|
+
required_columns = ["report_date"]
|
109
|
+
required_columns.extend(column_mapping.values())
|
110
|
+
|
111
|
+
# Filter columns
|
112
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
113
|
+
return raw_df[available_columns]
|
114
|
+
|
115
|
+
def _clean_balance_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
116
|
+
"""清理和标准化资产负债表数据
|
117
|
+
|
118
|
+
Args:
|
119
|
+
raw_df: Raw DataFrame from Sina API
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
Standardized DataFrame with consistent columns
|
123
|
+
"""
|
124
|
+
# Convert timestamp columns if exists
|
125
|
+
if "报告日" in raw_df.columns:
|
126
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
127
|
+
raw_df["report_date"] = pd.to_datetime(
|
128
|
+
raw_df["report_date"], format="%Y%m%d"
|
129
|
+
)
|
130
|
+
|
131
|
+
if "更新日期" in raw_df.columns:
|
132
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
133
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
134
|
+
|
135
|
+
# Standardize column names
|
136
|
+
column_mapping = {
|
137
|
+
"类型": "report_type",
|
138
|
+
"币种": "currency",
|
139
|
+
"资产总计": "total_assets",
|
140
|
+
"流动资产合计": "current_assets",
|
141
|
+
"货币资金": "cash_and_equivalents",
|
142
|
+
"存货": "inventory",
|
143
|
+
"交易性金融资产": "current_investments",
|
144
|
+
"应收票据及应收账款": "trade_and_non_trade_receivables",
|
145
|
+
"非流动资产合计": "non_current_assets",
|
146
|
+
"固定资产": "property_plant_and_equipment",
|
147
|
+
"商誉": "goodwill_and_intangible_assets",
|
148
|
+
"长期股权投资": "investments",
|
149
|
+
"其他非流动金融资产": "non_current_investments",
|
150
|
+
"实收资本(或股本)": "outstanding_shares",
|
151
|
+
"递延所得税资产": "tax_assets",
|
152
|
+
"负债合计": "total_liabilities",
|
153
|
+
"流动负债合计": "current_liabilities",
|
154
|
+
"短期借款": "current_debt",
|
155
|
+
"应付票据及应付账款": "trade_and_non_trade_payables",
|
156
|
+
"合同负债": "deferred_revenue",
|
157
|
+
"吸收存款及同业存放": "deposit_liabilities",
|
158
|
+
"非流动负债合计": "non_current_liabilities",
|
159
|
+
"长期借款": "non_current_debt",
|
160
|
+
"递延所得税负债": "tax_liabilities",
|
161
|
+
"所有者权益(或股东权益)合计": "shareholders_equity",
|
162
|
+
"未分配利润": "retained_earnings",
|
163
|
+
"其他综合收益": "accumulated_other_comprehensive_income",
|
164
|
+
}
|
165
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
166
|
+
|
167
|
+
# Select only required columns
|
168
|
+
required_columns = [
|
169
|
+
"report_date",
|
170
|
+
"report_period",
|
171
|
+
"period",
|
172
|
+
"currency",
|
173
|
+
"total_assets",
|
174
|
+
"current_assets",
|
175
|
+
"cash_and_equivalents",
|
176
|
+
"inventory",
|
177
|
+
"current_investments",
|
178
|
+
"trade_and_non_trade_receivables",
|
179
|
+
"non_current_assets",
|
180
|
+
"property_plant_and_equipment",
|
181
|
+
"goodwill_and_intangible_assets",
|
182
|
+
"investments",
|
183
|
+
"non_current_investments",
|
184
|
+
"outstanding_shares",
|
185
|
+
"tax_assets",
|
186
|
+
"total_liabilities",
|
187
|
+
"current_liabilities",
|
188
|
+
"current_debt",
|
189
|
+
"trade_and_non_trade_payables",
|
190
|
+
"deferred_revenue",
|
191
|
+
"deposit_liabilities",
|
192
|
+
"non_current_liabilities",
|
193
|
+
"non_current_debt",
|
194
|
+
"tax_liabilities",
|
195
|
+
"shareholders_equity",
|
196
|
+
"retained_earnings",
|
197
|
+
"accumulated_other_comprehensive_income",
|
198
|
+
]
|
199
|
+
|
200
|
+
# Calculate total_debt
|
201
|
+
if "current_debt" in raw_df.columns and "non_current_debt" in raw_df.columns:
|
202
|
+
raw_df["total_debt"] = raw_df["current_debt"] + raw_df["non_current_debt"]
|
203
|
+
required_columns.append("total_debt")
|
204
|
+
|
205
|
+
# Filter columns
|
206
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
207
|
+
return raw_df[available_columns]
|
208
|
+
|
209
|
+
def _clean_income_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
210
|
+
"""清理和标准化利润表数据
|
211
|
+
|
212
|
+
Args:
|
213
|
+
raw_df: Raw DataFrame from Sina API
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
Standardized DataFrame with consistent columns
|
217
|
+
"""
|
218
|
+
# Convert timestamp columns if exists
|
219
|
+
if "报告日" in raw_df.columns:
|
220
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
221
|
+
raw_df["report_date"] = pd.to_datetime(
|
222
|
+
raw_df["report_date"], format="%Y%m%d"
|
223
|
+
)
|
224
|
+
|
225
|
+
if "更新日期" in raw_df.columns:
|
226
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
227
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
228
|
+
|
229
|
+
# Standardize column names
|
230
|
+
column_mapping = {
|
231
|
+
"类型": "report_type",
|
232
|
+
"币种": "currency",
|
233
|
+
"营业总收入": "revenue",
|
234
|
+
"营业成本": "cost_of_revenue",
|
235
|
+
"营业利润": "operating_profit",
|
236
|
+
"销售费用": "selling_general_and_administrative_expenses",
|
237
|
+
"管理费用": "operating_expense",
|
238
|
+
"研发费用": "research_and_development",
|
239
|
+
"利息支出": "interest_expense",
|
240
|
+
"利润总额": "ebit",
|
241
|
+
"所得税费用": "income_tax_expense",
|
242
|
+
"净利润": "net_income",
|
243
|
+
"归属于母公司所有者的净利润": "net_income_common_stock",
|
244
|
+
"少数股东损益": "net_income_non_controlling_interests",
|
245
|
+
"基本每股收益": "earnings_per_share",
|
246
|
+
"稀释每股收益": "earnings_per_share_diluted",
|
247
|
+
}
|
248
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
249
|
+
|
250
|
+
# Select only required columns
|
251
|
+
required_columns = [
|
252
|
+
"report_date",
|
253
|
+
"period",
|
254
|
+
"currency",
|
255
|
+
"revenue",
|
256
|
+
"cost_of_revenue",
|
257
|
+
"operating_profit",
|
258
|
+
"operating_expense",
|
259
|
+
"selling_general_and_administrative_expenses",
|
260
|
+
"research_and_development",
|
261
|
+
"interest_expense",
|
262
|
+
"ebit",
|
263
|
+
"income_tax_expense",
|
264
|
+
"net_income",
|
265
|
+
"net_income_common_stock",
|
266
|
+
"net_income_non_controlling_interests",
|
267
|
+
"earnings_per_share",
|
268
|
+
"earnings_per_share_diluted",
|
269
|
+
]
|
270
|
+
|
271
|
+
# Filter columns
|
272
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
273
|
+
return raw_df[available_columns]
|
@@ -1,39 +1,47 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
import pandas as pd
|
3
|
-
|
4
|
-
|
5
|
-
class HistoricalDataProvider(ABC):
|
6
|
-
def __init__(
|
7
|
-
self,
|
8
|
-
symbol: str,
|
9
|
-
interval: str = "day",
|
10
|
-
interval_multiplier: int = 1,
|
11
|
-
start_date: str = "1970-01-01",
|
12
|
-
end_date: str = "2030-12-31",
|
13
|
-
adjust: str = "none",
|
14
|
-
) -> None:
|
15
|
-
self.symbol = symbol
|
16
|
-
self.interval = interval
|
17
|
-
self.interval_multiplier = interval_multiplier
|
18
|
-
self.start_date = start_date
|
19
|
-
self.end_date = end_date
|
20
|
-
self.adjust = adjust
|
21
|
-
|
22
|
-
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
class HistoricalDataProvider(ABC):
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
symbol: str,
|
9
|
+
interval: str = "day",
|
10
|
+
interval_multiplier: int = 1,
|
11
|
+
start_date: str = "1970-01-01",
|
12
|
+
end_date: str = "2030-12-31",
|
13
|
+
adjust: str = "none",
|
14
|
+
) -> None:
|
15
|
+
self.symbol = symbol
|
16
|
+
self.interval = interval
|
17
|
+
self.interval_multiplier = interval_multiplier
|
18
|
+
self.start_date = start_date
|
19
|
+
self.end_date = end_date
|
20
|
+
self.adjust = adjust
|
21
|
+
self._validate_dates()
|
22
|
+
|
23
|
+
def _validate_dates(self):
|
24
|
+
try:
|
25
|
+
pd.to_datetime(self.start_date)
|
26
|
+
pd.to_datetime(self.end_date)
|
27
|
+
except ValueError:
|
28
|
+
raise ValueError("Invalid date format. Please use YYYY-MM-DD.")
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def get_supported_intervals(cls):
|
32
|
+
return ["minute", "hour", "day", "week", "month", "year"]
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def get_hist_data(self) -> pd.DataFrame:
|
36
|
+
"""Fetches historical market data
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
pd.DataFrame:
|
40
|
+
- timestamp (UTC)
|
41
|
+
- open
|
42
|
+
- high
|
43
|
+
- low
|
44
|
+
- close
|
45
|
+
- volume
|
46
|
+
"""
|
47
|
+
pass
|