siat 3.7.6__py3-none-any.whl → 3.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/__init__.py +55 -33
- siat/allin.py +3 -0
- siat/common.py +249 -1
- siat/fin_stmt2_yahoo.py +839 -0
- siat/markowitz2.py +12 -12
- siat/security_prices.py +8 -0
- siat/stock.py +5 -5
- siat/valuation.py +12 -8
- {siat-3.7.6.dist-info → siat-3.7.8.dist-info}/METADATA +1 -1
- {siat-3.7.6.dist-info → siat-3.7.8.dist-info}/RECORD +13 -12
- {siat-3.7.6.dist-info → siat-3.7.8.dist-info}/LICENSE +0 -0
- {siat-3.7.6.dist-info → siat-3.7.8.dist-info}/WHEEL +0 -0
- {siat-3.7.6.dist-info → siat-3.7.8.dist-info}/top_level.txt +0 -0
siat/fin_stmt2_yahoo.py
ADDED
@@ -0,0 +1,839 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""
|
3
|
+
本模块功能:上市公司的财务报表分析,数据层
|
4
|
+
特点1:科目项目不采用字符串匹配方法,采用contains_any和contains_all匹配方法!
|
5
|
+
特点2:以苹果财报的项目名称为基准!其他股票的财报项目名称若不同则转换为苹果财报的名称
|
6
|
+
所属工具包:证券投资分析工具SIAT
|
7
|
+
SIAT:Security Investment Analysis Tool
|
8
|
+
创建日期:2024年11月28日
|
9
|
+
最新修订日期:2024年11月28日
|
10
|
+
作者:王德宏 (WANG Dehong, Peter)
|
11
|
+
作者单位:北京外国语大学国际商学院
|
12
|
+
版权所有:王德宏
|
13
|
+
用途限制:仅限研究与教学使用,不可商用!商用需要额外授权。
|
14
|
+
特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
|
15
|
+
"""
|
16
|
+
|
17
|
+
#==============================================================================
|
18
|
+
#关闭所有警告
|
19
|
+
import warnings; warnings.filterwarnings('ignore')
|
20
|
+
|
21
|
+
from siat.common import *
|
22
|
+
import pandas as pd
|
23
|
+
import numpy as np
|
24
|
+
#==============================================================================
|
25
|
+
#本模块使用yahooquery插件
|
26
|
+
#==============================================================================
|
27
|
+
if __name__=='__main__':
|
28
|
+
symbol='AAPL' #以其财报项目名称作为基准
|
29
|
+
|
30
|
+
symbol='JD'
|
31
|
+
symbol='INTL'
|
32
|
+
symbol='MSFT'
|
33
|
+
symbol='600519.SS'
|
34
|
+
symbol='00700.HK'
|
35
|
+
|
36
|
+
symbol='601398.SS'
|
37
|
+
symbol='601328.SS'
|
38
|
+
|
39
|
+
max_count=3
|
40
|
+
max_sleep=30
|
41
|
+
|
42
|
+
report_type="balance sheet"
|
43
|
+
printout=True
|
44
|
+
|
45
|
+
bsdf_raw=get_1statement_yahoo2(symbol,report_type="balance sheet",fix_entry=False)
|
46
|
+
bsdf_fix=get_1statement_yahoo2(symbol,report_type="balance sheet",fix_entry=True)
|
47
|
+
|
48
|
+
isdf_raw=get_1statement_yahoo2(symbol,report_type="income statement",fix_entry=False)
|
49
|
+
|
50
|
+
|
51
|
+
cfsdf_raw=get_1statement_yahoo2(symbol,report_type="cash flow",fix_entry=False)
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
def get_1statement_yahoo2(symbol,report_type="balance sheet", \
|
56
|
+
report_period="all", \
|
57
|
+
max_count=3,max_sleep=30, \
|
58
|
+
fix_entry=True,printout=False):
|
59
|
+
"""
|
60
|
+
功能:获取雅虎财经上一只股票所有的年度和季度资产负债表,采用contains匹配法
|
61
|
+
|
62
|
+
参数:
|
63
|
+
symbol:股票代码,五位港股需要转换为四位
|
64
|
+
report_type:需要抓取的财报类型,默认资产负债表"balance sheet"
|
65
|
+
report_period:财报期间,默认年报+季报all,可选单纯年报(annual)或单纯季报(quarterly)
|
66
|
+
max_count:抓取失败时的总尝试次数,默认3
|
67
|
+
max_sleep=30:抓取失败时再次尝试间隔的随机秒数最大值
|
68
|
+
fix_entry:是否对重点科目进行检查和填充,默认是True
|
69
|
+
printout:是否打印赚取到的财报,默认否False
|
70
|
+
|
71
|
+
返回值:成功时返回df,失败时返回None
|
72
|
+
"""
|
73
|
+
report_type=report_type.lower()
|
74
|
+
if 'balance' in report_type:
|
75
|
+
report_type="balance sheet"
|
76
|
+
elif 'income' in report_type:
|
77
|
+
report_type="income statement"
|
78
|
+
else:
|
79
|
+
report_type="cash flow statement"
|
80
|
+
print(f" Retrieving {report_type} of {symbol} ... ...")
|
81
|
+
|
82
|
+
symbol=symbol.upper()
|
83
|
+
result,prefix,suffix=split_prefix_suffix(symbol)
|
84
|
+
#处理港股代码:五位变四位
|
85
|
+
if result & (suffix=='HK'):
|
86
|
+
if len(prefix)==5:
|
87
|
+
symbol=symbol[1:]
|
88
|
+
|
89
|
+
#=====抓取财务报告==========================================================
|
90
|
+
from yahooquery import Ticker
|
91
|
+
#问题:如何判断无效的symbol?
|
92
|
+
try:
|
93
|
+
stock = Ticker(symbol)
|
94
|
+
except:
|
95
|
+
print(" #Warning(get_1statement_yahoo2): Yahoo Finance currently unaccessable")
|
96
|
+
return None
|
97
|
+
|
98
|
+
if contains_any(report_period,['all','annual']):
|
99
|
+
#获取近5年年报,最多尝试max_count次
|
100
|
+
for c in range(max_count):
|
101
|
+
if report_type=="balance sheet":
|
102
|
+
stmta=stock.balance_sheet() # Defaults to Annual
|
103
|
+
elif report_type=="income statement":
|
104
|
+
stmta=stock.income_statement()
|
105
|
+
else:
|
106
|
+
stmta=stock.cash_flow()
|
107
|
+
|
108
|
+
#若抓取成功则直接结束
|
109
|
+
if isinstance(stmta,pd.DataFrame): break
|
110
|
+
|
111
|
+
#若能够访问雅虎则挂起一会儿再尝试访问,否则结束
|
112
|
+
if test_yahoo_finance():
|
113
|
+
sleep_random(max_sleep)
|
114
|
+
else: break
|
115
|
+
|
116
|
+
#获取信息失败,判断原因
|
117
|
+
if not isinstance(stmta,pd.DataFrame):
|
118
|
+
if test_yahoo_finance():
|
119
|
+
print(" #Warning(get_1statement_yahoo2): {} not found for annual reports".format(symbol))
|
120
|
+
else:
|
121
|
+
print(" #Warning(get_1statement_yahoo2): sorry, Yahoo Finance currently unaccessable")
|
122
|
+
return None
|
123
|
+
else:
|
124
|
+
stmta=None
|
125
|
+
|
126
|
+
if contains_any(report_period,['all','quarterly']):
|
127
|
+
#获取近7个季度报,注意含其中跨过的年报,与年报合并时需要去重!
|
128
|
+
for c in range(max_count):
|
129
|
+
if report_type=="balance sheet":
|
130
|
+
stmtq=stock.balance_sheet(frequency="q") # Defaults to Annual
|
131
|
+
elif report_type=="income statement":
|
132
|
+
stmtq=stock.income_statement(frequency="q")
|
133
|
+
else:
|
134
|
+
stmtq=stock.cash_flow(frequency="q")
|
135
|
+
|
136
|
+
if isinstance(stmtq,pd.DataFrame): break
|
137
|
+
else:
|
138
|
+
sleep_random(max_sleep)
|
139
|
+
|
140
|
+
#前面已经判断过雅虎是否能够访问以及symbol是否存在,此处无需再判断
|
141
|
+
else:
|
142
|
+
stmtq=None
|
143
|
+
|
144
|
+
#合并年度和季度报表
|
145
|
+
if not (stmta is None) and not (stmtq is None):
|
146
|
+
stmt=pd.concat([stmta,stmtq])
|
147
|
+
elif not (stmta is None):
|
148
|
+
stmt=stmta
|
149
|
+
elif not (stmtq is None):
|
150
|
+
stmt=stmtq
|
151
|
+
else:
|
152
|
+
print(" #Error(get_1statement_yahoo2): retrieved no periodic reports for",symbol)
|
153
|
+
return None
|
154
|
+
|
155
|
+
#合并后排序+去重
|
156
|
+
stmt.sort_values(by=['asOfDate','periodType'],inplace=True)
|
157
|
+
#去掉重复记录: 保留年报数据项多,去掉数据项少的季报
|
158
|
+
stmt.drop_duplicates(subset=['asOfDate'],keep='first',inplace=True)
|
159
|
+
|
160
|
+
|
161
|
+
#=====关键字段检查与缺失替代处理=============================================
|
162
|
+
if fix_entry:
|
163
|
+
|
164
|
+
if report_type=="balance sheet":
|
165
|
+
#检查和填补资产负债表科目
|
166
|
+
stmt=check_fill_bs_yahoo(stmt)
|
167
|
+
elif report_type=="income statement":
|
168
|
+
#检查和填补利润表科目
|
169
|
+
stmt=check_fill_is_yahoo(stmt)
|
170
|
+
else:
|
171
|
+
#检查和填补现金流量表科目
|
172
|
+
stmt=check_fill_cfs_yahoo(stmt)
|
173
|
+
|
174
|
+
#字段再排序
|
175
|
+
stmt_cols=list(stmt)
|
176
|
+
head_cols=['asOfDate','periodType','currencyCode'] #不参与排序的字段
|
177
|
+
for c in head_cols:
|
178
|
+
stmt_cols.remove(c)
|
179
|
+
stmt_cols.sort()
|
180
|
+
stmt_cols=head_cols+stmt_cols
|
181
|
+
stmt=stmt[stmt_cols]
|
182
|
+
|
183
|
+
#总检查:总资产=总负债+总权益是否成立
|
184
|
+
#stmt['TA-TL-TE']=stmt['TotalAssets']-stmt['TotalLiabilities']-stmt['TotalEquity']
|
185
|
+
|
186
|
+
#是否打印供检查
|
187
|
+
if printout:
|
188
|
+
print_stmt_yahoo2(stmt,title_prefix=report_type.title())
|
189
|
+
|
190
|
+
return stmt
|
191
|
+
|
192
|
+
|
193
|
+
#==============================================================================
|
194
|
+
if __name__=='__main__':
|
195
|
+
symbol="AAPL"
|
196
|
+
|
197
|
+
from yahooquery import Ticker
|
198
|
+
stock = Ticker(symbol)
|
199
|
+
stmt=stock.balance_sheet()
|
200
|
+
|
201
|
+
df=check_fill_bs_yahoo(stmt)
|
202
|
+
|
203
|
+
def check_fill_bs_yahoo(stmt):
|
204
|
+
"""
|
205
|
+
|
206
|
+
功能:检查和填补雅虎抓取到的资产负债表部分科目
|
207
|
+
科目基准:苹果财报科目
|
208
|
+
|
209
|
+
输入:雅虎抓取到的原始资产负债表
|
210
|
+
输出:经过检查的资产负债表,缺失科目可不断填充
|
211
|
+
|
212
|
+
注意:单纯在资产负债表内填充缺失项比较勉强
|
213
|
+
抓取单表后可暂不填充,在在三表合成后综合填充!!!
|
214
|
+
"""
|
215
|
+
#=====资产部分
|
216
|
+
#应收账款
|
217
|
+
entry_item='AccountsReceivable' #主项科目
|
218
|
+
#entry_item in list(stmt)
|
219
|
+
entry_words=["accounts","receivable"] #主项科目的关键词,忽略字母大小写
|
220
|
+
#可替代科目组的关键词列表组
|
221
|
+
entry_alternatives=[["receivables"]]
|
222
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
223
|
+
|
224
|
+
|
225
|
+
#存货
|
226
|
+
entry_item='Inventory' #主项科目
|
227
|
+
#entry_item in list(stmt)
|
228
|
+
entry_words=["inventory"] #主项科目的关键词,忽略字母大小写
|
229
|
+
#可替代科目组的关键词列表组
|
230
|
+
entry_alternatives=[]
|
231
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
232
|
+
|
233
|
+
|
234
|
+
#=====负债部分
|
235
|
+
#流动(有息)债务CurrentDebt:可替代科目-流动负债
|
236
|
+
entry_item='CurrentDebt' #主项科目
|
237
|
+
entry_words=["current","debt"] #主项科目的关键词,忽略字母大小写
|
238
|
+
#可替代科目组的关键词列表组
|
239
|
+
#entry_alternatives=[["current","liabilities"],["current","borrowing"],["current","obligation"]]
|
240
|
+
entry_alternatives=[["current","liabilities"]]
|
241
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
242
|
+
|
243
|
+
|
244
|
+
#流动负债(有息债务+应付)
|
245
|
+
entry_item='CurrentLiabilities' #主项科目
|
246
|
+
entry_words=["current","liabilities"] #主项科目的关键词,忽略字母大小写
|
247
|
+
#可替代科目组的关键词列表组
|
248
|
+
entry_alternatives=[["current","debt"]]
|
249
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
250
|
+
|
251
|
+
|
252
|
+
#应付账款
|
253
|
+
entry_item='AccountsPayable' #主项科目
|
254
|
+
#entry_item in list(stmt)
|
255
|
+
entry_words=["accounts","payable"] #主项科目的关键词,忽略字母大小写
|
256
|
+
#可替代科目组的关键词列表组
|
257
|
+
entry_alternatives=[["payables"]]
|
258
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
259
|
+
|
260
|
+
|
261
|
+
#总(有息)债务
|
262
|
+
entry_item='TotalDebt' #主项科目
|
263
|
+
#entry_item in stmt_cols
|
264
|
+
entry_words=["total","debt"] #主项科目的关键词,忽略字母大小写
|
265
|
+
#可替代科目组的关键词列表组
|
266
|
+
entry_alternatives=[["total","liabilities"]]
|
267
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
268
|
+
|
269
|
+
|
270
|
+
#总负债
|
271
|
+
entry_item='TotalLiabilities' #主项科目
|
272
|
+
#entry_item in list(stmt)
|
273
|
+
entry_words=["total","liabilities"] #主项科目的关键词,忽略字母大小写
|
274
|
+
#可替代科目组的关键词列表组
|
275
|
+
entry_alternatives=[["total","debt"]]
|
276
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
277
|
+
|
278
|
+
|
279
|
+
#=====权益部分
|
280
|
+
#权益总额
|
281
|
+
entry_item='TotalEquity' #主项科目
|
282
|
+
#entry_item in list(stmt)
|
283
|
+
entry_words=["total","equity"] #主项科目的关键词,忽略字母大小写
|
284
|
+
#可替代科目组的关键词列表组
|
285
|
+
entry_alternatives=[["stock","holder","quity"]]
|
286
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
287
|
+
|
288
|
+
|
289
|
+
return stmt
|
290
|
+
|
291
|
+
#==============================================================================
|
292
|
+
if __name__=='__main__':
|
293
|
+
from yahooquery import Ticker
|
294
|
+
stock = Ticker("AAPL")
|
295
|
+
stmt=stock.income_statement()
|
296
|
+
|
297
|
+
df=check_fill_is_yahoo(stmt)
|
298
|
+
|
299
|
+
def check_fill_is_yahoo(stmt):
|
300
|
+
"""
|
301
|
+
|
302
|
+
功能:检查和填补雅虎抓取到的利润表部分科目
|
303
|
+
输入:雅虎抓取到的原始利润表
|
304
|
+
输出:经过检查的利润表,缺失科目可不断填充
|
305
|
+
|
306
|
+
注意:单纯在利润表内填充缺失项比较勉强,可在三表合成后综合填充!!!
|
307
|
+
"""
|
308
|
+
|
309
|
+
#=====收入部分
|
310
|
+
#成本与费用
|
311
|
+
#利息费用
|
312
|
+
entry_item='InterestExpense' #主项科目
|
313
|
+
#entry_item in list(stmt)
|
314
|
+
entry_words=["interest","expense"] #主项科目的关键词,忽略字母大小写
|
315
|
+
#可替代科目组的关键词列表组
|
316
|
+
entry_alternatives=[["interest","expense","operating"]]
|
317
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
318
|
+
|
319
|
+
#收入成本
|
320
|
+
entry_item='CostOfRevenue' #主项科目
|
321
|
+
#entry_item in list(stmt)
|
322
|
+
entry_words=["cost","revenue"] #主项科目的关键词,忽略字母大小写
|
323
|
+
#可替代科目组的关键词列表组
|
324
|
+
entry_alternatives=[["reconciled","cost","revenue"]]
|
325
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
326
|
+
|
327
|
+
#营业成本
|
328
|
+
entry_item='OperatingExpense' #主项科目
|
329
|
+
#entry_item in list(stmt)
|
330
|
+
entry_words=["operating","expense"] #主项科目的关键词,忽略字母大小写
|
331
|
+
#可替代科目组的关键词列表组
|
332
|
+
entry_alternatives=[["cost","revenue"]]
|
333
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
334
|
+
|
335
|
+
#折旧与摊销
|
336
|
+
entry_item='Depreciation' #主项科目
|
337
|
+
#entry_item in list(stmt)
|
338
|
+
entry_words=["depreciation"] #主项科目的关键词,忽略字母大小写
|
339
|
+
#可替代科目组的关键词列表组
|
340
|
+
entry_alternatives=[["reconciled","depreciation"]]
|
341
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
342
|
+
|
343
|
+
#收入与利润
|
344
|
+
#营业收入
|
345
|
+
entry_item='OperatingRevenue' #主项科目
|
346
|
+
#entry_item in list(stmt)
|
347
|
+
entry_words=["operating","revenue"] #主项科目的关键词,忽略字母大小写
|
348
|
+
#可替代科目组的关键词列表组
|
349
|
+
entry_alternatives=[["total","revenue"]]
|
350
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
351
|
+
|
352
|
+
#营业利润
|
353
|
+
entry_item='OperatingIncome' #主项科目
|
354
|
+
#entry_item in list(stmt)
|
355
|
+
entry_words=["operating","income"] #主项科目的关键词,忽略字母大小写
|
356
|
+
#可替代科目组的关键词列表组
|
357
|
+
entry_alternatives=[]
|
358
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
359
|
+
|
360
|
+
empty = stmt[entry_item].isna().all()
|
361
|
+
if empty:
|
362
|
+
stmt[entry_item]=stmt['OperatingRevenue']-stmt['OperatingExpense']
|
363
|
+
|
364
|
+
#EBITDA
|
365
|
+
entry_item='EBITDA' #主项科目
|
366
|
+
#entry_item in list(stmt)
|
367
|
+
entry_words=["ebitda"] #主项科目的关键词,忽略字母大小写
|
368
|
+
#可替代科目组的关键词列表组
|
369
|
+
entry_alternatives=[]
|
370
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
371
|
+
|
372
|
+
#EBIT
|
373
|
+
entry_item='EBIT' #主项科目
|
374
|
+
#entry_item in list(stmt)
|
375
|
+
entry_words=["ebitda"] #主项科目的关键词,忽略字母大小写
|
376
|
+
#可替代科目组的关键词列表组
|
377
|
+
entry_alternatives=[]
|
378
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
379
|
+
empty = stmt[entry_item].isna().all()
|
380
|
+
if empty:
|
381
|
+
stmt[entry_item]=stmt['EBITDA']+stmt['Depreciation']
|
382
|
+
|
383
|
+
#毛利润
|
384
|
+
entry_item='GrossProfit' #主项科目
|
385
|
+
#entry_item in list(stmt)
|
386
|
+
entry_words=["gross","profit"] #主项科目的关键词,忽略字母大小写
|
387
|
+
#可替代科目组的关键词列表组
|
388
|
+
entry_alternatives=[["operating","income"],["pretax","income"]]
|
389
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
390
|
+
|
391
|
+
empty = stmt[entry_item].isna().all()
|
392
|
+
if empty:
|
393
|
+
stmt[entry_item]=stmt['OperatingRevenue']-stmt['OperatingExpense']
|
394
|
+
|
395
|
+
empty = stmt[entry_item].isna().all()
|
396
|
+
if empty:
|
397
|
+
stmt[entry_item]=stmt['EBITDA']
|
398
|
+
|
399
|
+
|
400
|
+
return stmt
|
401
|
+
|
402
|
+
#==============================================================================
|
403
|
+
if __name__=='__main__':
|
404
|
+
from yahooquery import Ticker
|
405
|
+
stock = Ticker("AAPL")
|
406
|
+
stmt=stock.cash_flow()
|
407
|
+
|
408
|
+
df=check_fill_cfs_yahoo(stmt)
|
409
|
+
|
410
|
+
def check_fill_cfs_yahoo(stmt):
|
411
|
+
"""
|
412
|
+
|
413
|
+
功能:检查和填补雅虎抓取到的现金流量表部分科目
|
414
|
+
输入:雅虎抓取到的原始现金流量表
|
415
|
+
输出:经过检查的现金流量表,缺失科目可不断填充
|
416
|
+
|
417
|
+
注意:单纯在现金流量表内填充缺失项比较勉强,可在三表合成后综合填充!!!
|
418
|
+
"""
|
419
|
+
|
420
|
+
#现金股利支付
|
421
|
+
entry_item='CashDividendsPaid' #主项科目
|
422
|
+
#entry_item in list(stmt)
|
423
|
+
entry_words=["cash","dividends","paid"] #主项科目的关键词,忽略字母大小写
|
424
|
+
#可替代科目组的关键词列表组
|
425
|
+
entry_alternatives=[["common","stock","dividend","paid"]]
|
426
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
427
|
+
|
428
|
+
#融资活动现金流
|
429
|
+
entry_item='CashFlowFromFinancingActivities' #主项科目
|
430
|
+
#entry_item in list(stmt)
|
431
|
+
entry_words=["cash","flow","from","financing","activities"] #主项科目的关键词,忽略字母大小写
|
432
|
+
#可替代科目组的关键词列表组
|
433
|
+
entry_alternatives=[["cash","flow","from","continuing","financing","activities"],["financing","cash","flow"]]
|
434
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
435
|
+
|
436
|
+
#投资活动现金流
|
437
|
+
entry_item='CashFlowFromInvestingActivities' #主项科目
|
438
|
+
#entry_item in list(stmt)
|
439
|
+
entry_words=["cash","flow","from","investing","activities"] #主项科目的关键词,忽略字母大小写
|
440
|
+
#可替代科目组的关键词列表组
|
441
|
+
entry_alternatives=[["cash","flow","from","continuing","investing","activities"],["investing","cash","flow"]]
|
442
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
443
|
+
|
444
|
+
#经营活动现金流
|
445
|
+
entry_item='CashFlowFromOperatingActivities' #主项科目
|
446
|
+
#entry_item in list(stmt)
|
447
|
+
entry_words=["cash","flow","from","operating","activities"] #主项科目的关键词,忽略字母大小写
|
448
|
+
#可替代科目组的关键词列表组
|
449
|
+
entry_alternatives=[["cash","flow","from","continuing","operating","activities"],["operating","cash","flow"]]
|
450
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
451
|
+
|
452
|
+
return stmt
|
453
|
+
#==============================================================================
|
454
|
+
if __name__=='__main__':
|
455
|
+
from yahooquery import Ticker
|
456
|
+
stock = Ticker("AAPL")
|
457
|
+
stmt=stock.cash_flow()
|
458
|
+
|
459
|
+
title_prefix="Cash Flow Statement"
|
460
|
+
|
461
|
+
print_stmt_yahoo2(stmt,title_prefix)
|
462
|
+
|
463
|
+
def print_stmt_yahoo2(stmt,title_prefix):
|
464
|
+
"""
|
465
|
+
|
466
|
+
功能:打印雅虎抓取的财报
|
467
|
+
"""
|
468
|
+
|
469
|
+
stmtprt1=stmt.copy()
|
470
|
+
|
471
|
+
if 'ticker' in list(stmtprt1):
|
472
|
+
symbol=stmtprt1['ticker'][0]
|
473
|
+
del stmtprt1['ticker']
|
474
|
+
else:
|
475
|
+
symbol=stmtprt1.index[0]
|
476
|
+
|
477
|
+
stmtprt1['reportDate']=stmtprt1['asOfDate'].apply(lambda x: x.strftime("%y-%m-%d"))
|
478
|
+
stmtprt1.set_index('reportDate',inplace=True)
|
479
|
+
del stmtprt1['asOfDate']
|
480
|
+
|
481
|
+
currencyCode=stmtprt1['currencyCode'].values[0]
|
482
|
+
del stmtprt1['currencyCode']
|
483
|
+
|
484
|
+
cols1=list(stmtprt1)
|
485
|
+
cols1.remove('periodType')
|
486
|
+
million=1000000
|
487
|
+
for c in cols1:
|
488
|
+
stmtprt1[c]=stmtprt1[c].apply(lambda x: round(x/million,2))
|
489
|
+
|
490
|
+
stmtprt2=stmtprt1.T
|
491
|
+
cols=list(stmtprt2)
|
492
|
+
cols.sort(reverse=True)
|
493
|
+
stmtprt2=stmtprt2[cols]
|
494
|
+
stmtprt2['Item']=stmtprt2.index
|
495
|
+
stmtprt2=stmtprt2[['Item']+cols]
|
496
|
+
|
497
|
+
titletxt=f"{symbol}: {title_prefix}, in {currencyCode} millions"
|
498
|
+
footnote="Note: 12M indicates annual report, 3M quaterly reports"
|
499
|
+
df_display_CSS(stmtprt2,titletxt=titletxt,footnote=footnote, \
|
500
|
+
facecolor='papayawhip',decimals=2, \
|
501
|
+
first_col_align='left',second_col_align='right', \
|
502
|
+
last_col_align='right',other_col_align='right', \
|
503
|
+
titile_font_size='15px',heading_font_size='11px', \
|
504
|
+
data_font_size='11px',footnote_font_size='13px')
|
505
|
+
|
506
|
+
return
|
507
|
+
|
508
|
+
#==============================================================================
|
509
|
+
if __name__ == '__main__':
|
510
|
+
entry_item='CurrentDebt' #主项科目
|
511
|
+
entry_words=["current","debt"] #主项科目的关键词,忽略字母大小写
|
512
|
+
#可替代科目组的关键词列表组
|
513
|
+
entry_alternatives=[["current","liabilities"],["current","borrowing"],["current","obligation"]]
|
514
|
+
|
515
|
+
entry_item in list(stmt)
|
516
|
+
stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
|
517
|
+
stmt[entry_item]
|
518
|
+
|
519
|
+
def check_fill_entry(stmt,entry_item,entry_words,entry_alternatives):
|
520
|
+
"""
|
521
|
+
|
522
|
+
功能:检查抓取的原始财报文件stmt中的科目entry_item
|
523
|
+
若不存在则使用可替代科目。
|
524
|
+
若存在但全为空,则使用可替代科目填充。
|
525
|
+
若可替代科目也不存在,则赋值为全空。
|
526
|
+
|
527
|
+
参数:
|
528
|
+
stmt:赚取到的原始财报df
|
529
|
+
entry_item:需要处理的科目
|
530
|
+
entry_words:需处理科目的关键词列表
|
531
|
+
entry_alternatives:可替代科目的关键词组列表
|
532
|
+
|
533
|
+
返回值:更新后的财报df
|
534
|
+
"""
|
535
|
+
|
536
|
+
#合成所有的关键词列表组
|
537
|
+
entry_options=[entry_words]+entry_alternatives
|
538
|
+
|
539
|
+
#按照相似度匹配
|
540
|
+
stmt_cols=list(stmt)
|
541
|
+
entry_name=list_contains_all_list(stmt_cols,entry_options)
|
542
|
+
|
543
|
+
if entry_name: #找到
|
544
|
+
if entry_name != entry_item: #找到但不同名
|
545
|
+
#若科目名称不同则映射该名称
|
546
|
+
stmt[entry_item]=stmt[entry_name]
|
547
|
+
#del stmt[entry_name] #删除替代项目名称
|
548
|
+
else: #找到同名
|
549
|
+
#检查该科目是否全为空
|
550
|
+
empty = stmt[entry_item].isna().all()
|
551
|
+
if empty:
|
552
|
+
entry_name_alternative=list_contains_all_list(stmt_cols,entry_alternatives)
|
553
|
+
stmt[entry_item]=stmt[entry_name_alternative]
|
554
|
+
else: #主项未找到,可替代科目也未找到
|
555
|
+
stmt[entry_item]=np.nan
|
556
|
+
|
557
|
+
return stmt
|
558
|
+
|
559
|
+
#==============================================================================
|
560
|
+
#==============================================================================
|
561
|
+
if __name__=='__main__':
|
562
|
+
ticker='AAPL'
|
563
|
+
ticker='00700.HK'
|
564
|
+
|
565
|
+
report_period="all"
|
566
|
+
max_count=3; max_sleep=30
|
567
|
+
pre_fix_entry=False; post_fix_entry=True
|
568
|
+
pre_printout=False; printout=True
|
569
|
+
|
570
|
+
fsdf=get_financial_statements2_yahoo(ticker)
|
571
|
+
|
572
|
+
def get_financial_statements2_yahoo(ticker, \
|
573
|
+
report_period="all", \
|
574
|
+
max_count=3,max_sleep=30, \
|
575
|
+
pre_fix_entry=False,post_fix_entry=True, \
|
576
|
+
pre_printout=False,printout=False):
|
577
|
+
"""
|
578
|
+
功能:获取雅虎财经上一只股票所有的年度和季度财务报表
|
579
|
+
"""
|
580
|
+
|
581
|
+
# 变换港股代码5位-->4位
|
582
|
+
result,prefix,suffix=split_prefix_suffix(ticker)
|
583
|
+
if result & (suffix=='HK'):
|
584
|
+
if len(prefix)==5:
|
585
|
+
ticker=ticker[1:]
|
586
|
+
|
587
|
+
print(f" Searching for financial statements of {ticker} ... ...")
|
588
|
+
|
589
|
+
#获取资产负债表
|
590
|
+
df_bs=get_1statement_yahoo2(ticker,report_type="balance sheet", \
|
591
|
+
report_period=report_period, \
|
592
|
+
max_count=max_count,max_sleep=max_sleep, \
|
593
|
+
fix_entry=pre_fix_entry, \
|
594
|
+
printout=pre_printout)
|
595
|
+
|
596
|
+
|
597
|
+
#获取利润表
|
598
|
+
df_is=get_1statement_yahoo2(ticker,report_type="income statement", \
|
599
|
+
report_period=report_period, \
|
600
|
+
max_count=max_count,max_sleep=max_sleep, \
|
601
|
+
fix_entry=pre_fix_entry, \
|
602
|
+
printout=pre_printout)
|
603
|
+
|
604
|
+
|
605
|
+
#获取现金流量表
|
606
|
+
df_cfs=get_1statement_yahoo2(ticker,report_type="cash flow statement", \
|
607
|
+
report_period=report_period, \
|
608
|
+
max_count=max_count,max_sleep=max_sleep, \
|
609
|
+
fix_entry=pre_fix_entry, \
|
610
|
+
printout=pre_printout)
|
611
|
+
|
612
|
+
#=====三表合并
|
613
|
+
#合并1:资产负债表+利润表
|
614
|
+
head_cols=['asOfDate','periodType','currencyCode']
|
615
|
+
df_bs_is=pd.merge(df_bs,df_is,on=head_cols)
|
616
|
+
|
617
|
+
#合并2:+现金流量表
|
618
|
+
df=pd.merge(df_bs_is,df_cfs,on=head_cols)
|
619
|
+
df['ticker']=ticker
|
620
|
+
|
621
|
+
#合成后填充缺失项
|
622
|
+
if post_fix_entry:
|
623
|
+
df1=check_fill_fs_yahoo(df)
|
624
|
+
else:
|
625
|
+
df1=df
|
626
|
+
|
627
|
+
print(f" Successfully retrieved financial statements of {ticker}")
|
628
|
+
|
629
|
+
if printout:
|
630
|
+
df2=df1.copy()
|
631
|
+
df2.replace(0,np.nan,inplace=True)
|
632
|
+
title_prefix="Comprehensive Financial Statement"
|
633
|
+
print_stmt_yahoo2(df2,title_prefix)
|
634
|
+
|
635
|
+
return df1
|
636
|
+
|
637
|
+
#==============================================================================
|
638
|
+
if __name__=='__main__':
|
639
|
+
from yahooquery import Ticker
|
640
|
+
stock = Ticker("AAPL")
|
641
|
+
stmt=stock.cash_flow()
|
642
|
+
|
643
|
+
df=check_fill_cfs_yahoo(stmt)
|
644
|
+
|
645
|
+
def check_fill_fs_yahoo(stmt):
|
646
|
+
"""
|
647
|
+
|
648
|
+
功能:检查和填补雅虎抓取到的三张表合成后缺失项
|
649
|
+
输入:雅虎抓取到的三张表合成
|
650
|
+
输出:经过检查的三张表合成,缺失科目可不断填充
|
651
|
+
|
652
|
+
注意:单纯在资产负债表/利润表/现金流量表内填充缺失项比较勉强,可在三表合成后综合填充
|
653
|
+
"""
|
654
|
+
df=stmt.copy()
|
655
|
+
#为计算方便,将所有的nan替换为0
|
656
|
+
df.fillna(0,inplace=True)
|
657
|
+
|
658
|
+
|
659
|
+
|
660
|
+
|
661
|
+
|
662
|
+
|
663
|
+
|
664
|
+
|
665
|
+
return df
|
666
|
+
|
667
|
+
|
668
|
+
|
669
|
+
"""
|
670
|
+
最终获得的表结构:
|
671
|
+
['asOfDate',
|
672
|
+
'periodType',
|
673
|
+
|
674
|
+
'AccountsPayable(应付账款)',
|
675
|
+
'AccountsReceivable(应收账款)',
|
676
|
+
'AccumulatedDepreciation(累计折旧)',
|
677
|
+
'AdditionalPaidInCapital(资本公积,资本溢价,附加资本;paid-in capital:实收资本;缴入资本)',
|
678
|
+
'AllowanceForDoubtfulAccountsReceivable(备抵应收呆帐)',
|
679
|
+
'AvailableForSaleSecurities(可供出售金融资产;trading securities: 交易性金融资产)',
|
680
|
+
'BuildingsAndImprovements(建筑物改良)',
|
681
|
+
'CapitalStock(股本)',
|
682
|
+
'CashAndCashEquivalents(现金及现金等价物)',
|
683
|
+
'CashCashEquivalentsAndShortTermInvestments(现金、现金等价物及短期投资)',
|
684
|
+
'CashEquivalents(现金等价物)',
|
685
|
+
'CashFinancial(?)',
|
686
|
+
'CommonStock(普通股)',
|
687
|
+
'CommonStockEquity(普通股权益?)',
|
688
|
+
'ConstructionInProgress(在建工程)',
|
689
|
+
'CurrentAssets(流动资产)',
|
690
|
+
'CurrentLiabilities(流动负债)',
|
691
|
+
'DividendsPayable(应付股利)',
|
692
|
+
'FinishedGoods(制成品)',
|
693
|
+
'GoodwillAndOtherIntangibleAssets(商誉及其他无形资产)',
|
694
|
+
'GrossAccountsReceivable(应收账款总额)',
|
695
|
+
'GrossPPE(固定资产总额)',
|
696
|
+
'InventoriesAdjustmentsAllowances(存货调整备抵)',
|
697
|
+
'Inventory(存货)',
|
698
|
+
'InvestedCapital(投入资本)',
|
699
|
+
'InvestmentinFinancialAssets(金融资产投资?)',
|
700
|
+
'LandAndImprovements(土地改良)',
|
701
|
+
'MachineryFurnitureEquipment(机械家具设备?)',
|
702
|
+
'MinorityInterest(少数股东损益?)',
|
703
|
+
'NetPPE(固定资产净值)',
|
704
|
+
'NetTangibleAssets(有形资产净值)',
|
705
|
+
'NonCurrentDeferredAssets(非流动递延资产)',
|
706
|
+
'NonCurrentDeferredTaxesAssets(非流动递延税项资产?)',
|
707
|
+
'NonCurrentDeferredTaxesLiabilities(非流动递延税金负债?)',
|
708
|
+
'OrdinarySharesNumber(普通股数量?)',
|
709
|
+
'OtherCurrentAssets(其他流动资产)',
|
710
|
+
'OtherCurrentLiabilities(其他流动负债)',
|
711
|
+
'OtherEquityInterest(其他股权)',
|
712
|
+
'OtherIntangibleAssets(其他有形资产)',
|
713
|
+
'OtherNonCurrentAssets(其他非流动资产)',
|
714
|
+
'OtherPayable(其它应付款)',
|
715
|
+
'OtherProperties(?)',
|
716
|
+
'OtherReceivables(其他应收款)',
|
717
|
+
'Payables(应付款项)',
|
718
|
+
'PrepaidAssets(预付资产;预付费用)',
|
719
|
+
'Properties(财产?)',
|
720
|
+
'RawMaterials(原材料)',
|
721
|
+
'RetainedEarnings(留存收益)',
|
722
|
+
'ShareIssued(股票发行)',
|
723
|
+
'StockholdersEquity(股东权益)',
|
724
|
+
'TangibleBookValue(有形资产账面价值)',
|
725
|
+
'TotalAssets(总资产)',
|
726
|
+
'TotalCapitalization(资本总额?)',
|
727
|
+
'TotalEquityGrossMinorityInterest(少数股东权益总额)',
|
728
|
+
'TotalLiabilitiesNetMinorityInterest(?)',
|
729
|
+
'TotalNonCurrentAssets(非流动资产总额)',
|
730
|
+
'TotalNonCurrentLiabilitiesNetMinorityInterest(?)',
|
731
|
+
'TotalTaxPayable(应缴税款总额)',
|
732
|
+
'TradeandOtherPayablesNonCurrent(?)',
|
733
|
+
'WorkInProcess(在制品)',
|
734
|
+
'WorkingCapital(营运资本)',
|
735
|
+
'Amortization(摊销)',
|
736
|
+
|
737
|
+
'BasicAverageShares(未稀释的平均股数?)',
|
738
|
+
'BasicEPS( 基本每股收益,指属于普通股股东的当期净利润,除以发行在外普通股的加权平均数,可按存在月数加权)',
|
739
|
+
'CostOfRevenue(主营业务成本,营收成本)',
|
740
|
+
'DepreciationAndAmortizationInIncomeStatement(损益表中的折旧和摊销)',
|
741
|
+
'DepreciationIncomeStatement(损益表中的折旧)',
|
742
|
+
'DilutedAverageShares(稀释后平均股数?)',
|
743
|
+
'DilutedEPS(考虑了可转换债券和股票期权可能行权对于流通在外股数的影响)',
|
744
|
+
'EBIT(息税前利润)',
|
745
|
+
'EBITDA(未计利息、税项、折旧及摊销前的利润)',
|
746
|
+
'GeneralAndAdministrativeExpense(一般管理费用)',
|
747
|
+
'GrossProfit(营业毛利)',
|
748
|
+
'ImpairmentOfCapitalAssets(资本资产减值)',
|
749
|
+
'InterestExpense(利息费用)',
|
750
|
+
'InterestExpenseNonOperating(非经营性利息费用)',
|
751
|
+
'InterestIncome(利息收益)',
|
752
|
+
'InterestIncomeNonOperating(非经营性利息收入)',
|
753
|
+
'MinorityInterests(少数股东权益)',
|
754
|
+
'NetIncome(净利润)',
|
755
|
+
'NetIncomeCommonStockholders(归属于普通股股东的净利润,用于计算EPS和PE)',
|
756
|
+
'NetIncomeContinuousOperations(扣非后净利润)',
|
757
|
+
'NetIncomeFromContinuingAndDiscontinuedOperation(来自持续经营和停止经营业务的净收入)',
|
758
|
+
'NetIncomeFromContinuingOperationNetMinorityInterest(不归属少数股东的扣非后净利润?)',
|
759
|
+
'NetIncomeIncludingNoncontrollingInterests(包括非控股权的净收入?)',
|
760
|
+
'NetInterestIncome(净利息收入)',
|
761
|
+
'NetNonOperatingInterestIncomeExpense(?)',
|
762
|
+
'NormalizedEBITDA(调整后EBITDA?)',
|
763
|
+
'NormalizedIncome(调整后利润?)',
|
764
|
+
'OperatingExpense(营业费用)',
|
765
|
+
'OperatingIncome(营业利润)',
|
766
|
+
'OperatingRevenue(营业收入)',
|
767
|
+
'OtherNonOperatingIncomeExpenses(其他营业外收入支出?)',
|
768
|
+
'OtherOperatingExpenses(其它营业费用)',
|
769
|
+
'OtherSpecialCharges(其他特殊费用)',
|
770
|
+
'OtherunderPreferredStockDividend(优先股股利下的其他项目)',
|
771
|
+
'PretaxIncome(税前利润)',
|
772
|
+
'ReconciledCostOfRevenue(对账后的经营收入成本?)',
|
773
|
+
'ReconciledDepreciation(对账后的折旧)',
|
774
|
+
'RentAndLandingFees(租金及土地费用?)',
|
775
|
+
'RentExpenseSupplemental(补充租金费用?)',
|
776
|
+
'ResearchAndDevelopment(研发费用)',
|
777
|
+
'SellingAndMarketingExpense(销售和市场营销费用)',
|
778
|
+
'SellingGeneralAndAdministration(销售及一般管理费用)',
|
779
|
+
'SpecialIncomeCharges(特殊收入的手续费)',
|
780
|
+
'TaxEffectOfUnusualItems(非常项目的税收影响)',
|
781
|
+
'TaxProvision(税金?)',
|
782
|
+
'TaxRateForCalcs(计算用的税率)',
|
783
|
+
'TotalExpenses(总费用)',
|
784
|
+
'TotalOperatingIncomeAsReported(报告的总营业利润)',
|
785
|
+
'TotalOtherFinanceCost(其他财务成本合计)',
|
786
|
+
'TotalRevenue(总收入)',
|
787
|
+
'TotalUnusualItems(非经常性项目总计)',
|
788
|
+
'TotalUnusualItemsExcludingGoodwill(不包括商誉的非经常项目合计)',
|
789
|
+
'WriteOff(冲销?)',
|
790
|
+
|
791
|
+
'BeginningCashPosition(期初现金头寸)',
|
792
|
+
'CapitalExpenditure(资本支出)',
|
793
|
+
'CashDividendsPaid(现金股利支付)',
|
794
|
+
'ChangeInCashSupplementalAsReported(现金补充变更报告?)',
|
795
|
+
'ChangeInInventory(存货变化)',
|
796
|
+
'ChangeInWorkingCapital(营运资本的变动额)',
|
797
|
+
'DepreciationAndAmortization(折旧摊销)',
|
798
|
+
'EndCashPosition(期末现金头寸)',
|
799
|
+
'FreeCashFlow(自有现金流)',
|
800
|
+
'InvestingCashFlow(投资现金流)',
|
801
|
+
'NetOtherFinancingCharges(其他融资费用净额)',
|
802
|
+
'NetOtherInvestingChanges(其他投资变动净额)',
|
803
|
+
'OperatingCashFlow(营运现金流)',
|
804
|
+
'OtherNonCashItems(其他非现金项目)'
|
805
|
+
]
|
806
|
+
"""
|
807
|
+
|
808
|
+
#==============================================================================
|
809
|
+
if __name__ == '__main__':
|
810
|
+
fsdf=get_financial_statements('601398.SS')
|
811
|
+
account_entry='TotalAssets'
|
812
|
+
|
813
|
+
fsdf1=fs_entry_begin(fsdf,account_entry='TotalAssets',suffix='_begin')
|
814
|
+
|
815
|
+
def fs_entry_begin(fsdf,account_entry='TotalAssets',suffix='_begin'):
|
816
|
+
"""
|
817
|
+
功能:以上年年报期末科目数值作为本期年报和季报的期初,仅适用于雅虎财报!
|
818
|
+
"""
|
819
|
+
import pandas as pd
|
820
|
+
import numpy as np
|
821
|
+
#获取年报日期
|
822
|
+
ar_mm_dd=pd.to_datetime(fsdf[fsdf['periodType']=='12M']['asOfDate'].values[0]).strftime("%Y-%m-%d")[-5:]
|
823
|
+
|
824
|
+
fsdf['asOfDate_pd']=fsdf['asOfDate'].apply(lambda x: pd.to_datetime(x))
|
825
|
+
fsdf['Date_y4']=fsdf['asOfDate'].apply(lambda x: pd.to_datetime(x).strftime("%Y"))
|
826
|
+
fsdf['Date_begin_pd']=fsdf['Date_y4'].apply(lambda x: pd.to_datetime(str(int(x)-1)+'-'+ar_mm_dd))
|
827
|
+
|
828
|
+
asOfDate_pd_list=list(fsdf['asOfDate_pd'])
|
829
|
+
entry_begin=lambda x: fsdf[fsdf['asOfDate_pd']==x][account_entry].values[0] if x in asOfDate_pd_list else np.nan
|
830
|
+
fsdf[account_entry+suffix]=fsdf['Date_begin_pd'].apply(entry_begin)
|
831
|
+
|
832
|
+
fsdf.drop(['asOfDate_pd','Date_y4','Date_begin_pd'],axis=1,inplace=True)
|
833
|
+
|
834
|
+
return fsdf
|
835
|
+
|
836
|
+
#==============================================================================
|
837
|
+
#==============================================================================
|
838
|
+
#==============================================================================
|
839
|
+
#==============================================================================
|