siat 3.7.7__py3-none-any.whl → 3.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
siat/allin.py CHANGED
@@ -41,6 +41,9 @@ from siat.financials import *
41
41
  # 财务分析:雅虎源
42
42
  from siat.financials2 import *
43
43
 
44
+ # 财务报表:雅虎源
45
+ from siat.fin_stmt2_yahoo import *
46
+
44
47
  # 财务分析:中国
45
48
  from siat.financials_china import *
46
49
 
siat/common.py CHANGED
@@ -4171,6 +4171,77 @@ def firstLetterUpper(text):
4171
4171
 
4172
4172
  return utext
4173
4173
 
4174
+
4175
+ #==============================================================================
4176
+ if __name__ == '__main__':
4177
+ long_text = "Hello, this is a test string."
4178
+ short_text = "test strng"
4179
+
4180
+ similar_substring, similarity = find_similar_substring(long_text, short_text)
4181
+ if similarity:
4182
+ print(f"Similar substring found: {similar_substring}, Similarity: {similarity}")
4183
+ else:
4184
+ print("No similar substring found.")
4185
+
4186
+
4187
+ def find_similar_substring(long_string, short_string, threshold=0.7):
4188
+ """
4189
+
4190
+ 功能:判断一个字符串中是否存在与另一个字符串相似度较高的子串
4191
+ 注意:尚未测试
4192
+ """
4193
+
4194
+ import difflib
4195
+
4196
+ # 使用SequenceMatcher比较字符串
4197
+ matcher = difflib.SequenceMatcher(None, long_string, short_string)
4198
+
4199
+ # 遍历所有可能的子串长度
4200
+ for size in range(len(short_string), len(long_string) + 1):
4201
+ for start in range(0, len(long_string) - size + 1):
4202
+ # 获取子串并计算相似度
4203
+ substring = long_string[start:start + size]
4204
+ similarity = matcher.ratio()
4205
+
4206
+ # 如果相似度超过阈值,返回子串
4207
+ if similarity > threshold:
4208
+ return substring, similarity
4209
+
4210
+ # 如果没有找到相似度较高的子串,返回None
4211
+ return None, None
4212
+
4213
+
4214
+ #==============================================================================
4215
+ if __name__ == '__main__':
4216
+ str1 = "kitten"
4217
+ str2 = "sitting"
4218
+
4219
+ string_similarity(str1,str2)
4220
+
4221
+
4222
+ def string_similarity(str1,str2,ignore_cases=True):
4223
+ """
4224
+
4225
+ 功能:计算两个字符串的文本相似度
4226
+ """
4227
+ import difflib
4228
+
4229
+ if ignore_cases:
4230
+ string1=str1.lower()
4231
+ string2=str2.lower()
4232
+ else:
4233
+ string1=str1
4234
+ string2=str2
4235
+
4236
+ # 创建SequenceMatcher对象
4237
+ matcher = difflib.SequenceMatcher(None, string1, string2)
4238
+
4239
+ # 计算相似度
4240
+ similarity = matcher.ratio()
4241
+ #print(f"SequenceMatcher Similarity: {similarity:.2f}")
4242
+
4243
+ return similarity
4244
+
4174
4245
  #==============================================================================
4175
4246
  if __name__ == '__main__':
4176
4247
  string = "HeLLo, Welcome to this New WorLd!"
@@ -4181,7 +4252,7 @@ if __name__ == '__main__':
4181
4252
  def contains_any(string, words):
4182
4253
  """
4183
4254
 
4184
- 功能:测试字符串string中是否含有字符串列表words中的任意一个元素
4255
+ 功能:测试字符串string中是否含有字符串列表words中的任意一个元素,忽略字母大小写
4185
4256
  参数:
4186
4257
  string:字符串,大小写不限
4187
4258
  words:字符串列表,大小写不限
@@ -4208,6 +4279,183 @@ def contains_any(string, words):
4208
4279
  #检查字符串new_string是否包含列表new_words_list中的任何元素
4209
4280
  return any((word in new_string) for word in new_words_list)
4210
4281
 
4282
+ #==============================================================================
4283
+ if __name__ == '__main__':
4284
+ string = "HeLLo, Welcome to this New WorLd!"
4285
+ words = ["Hello", "World"]
4286
+ words = ["Hello", "World","the"]
4287
+
4288
+ contains_all(string, words)
4289
+
4290
+ def contains_all(string, words):
4291
+ """
4292
+
4293
+ 功能:测试字符串string中是否含有字符串列表words中的全部元素,忽略字母大小写
4294
+ 参数:
4295
+ string:字符串,大小写不限
4296
+ words:字符串列表,大小写不限
4297
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4298
+ """
4299
+
4300
+ result=True
4301
+ for w in words:
4302
+ if not contains_any(string,w):
4303
+ result=False
4304
+ break
4305
+
4306
+ return result
4307
+
4308
+
4309
+ #==============================================================================
4310
+ if __name__ == '__main__':
4311
+ alist = ["CurrentDebt",
4312
+ "CurrentDebtAndCapitalLeaseObligation",
4313
+ "CurrentDeferredLiabilities",
4314
+ "CurrentLiabilities",
4315
+ "OtherCurrentBorrowings",
4316
+ "OtherCurrentLiabilities",
4317
+ "OtherNonCurrentLiabilities",
4318
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4319
+
4320
+ alist = [
4321
+ "CurrentDebtAndCapitalLeaseObligation",
4322
+ "CurrentDeferredLiabilities",
4323
+ "CurrentLiabilities",
4324
+ "OtherCurrentBorrowings",
4325
+ "OtherCurrentLiabilities",
4326
+ "OtherNonCurrentLiabilities",
4327
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4328
+
4329
+ item_words = ["Current", "Debt"]
4330
+ item_words = ["Current", "Liabilities"]
4331
+
4332
+ perfect_match=True
4333
+
4334
+ list_contains_all(alist, item_words)
4335
+
4336
+ def list_contains_all(alist, item_words,perfect_match=True):
4337
+ """
4338
+
4339
+ 功能:测试列表alist中是否有元素含有字符串列表item_words中的全部元素,忽略字母大小写
4340
+ 参数:
4341
+ alist:字符串列表,大小写不限
4342
+ item_words:字符串列表,大小写不限
4343
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4344
+ 返回值:
4345
+ 若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
4346
+ 若无则返回False
4347
+ """
4348
+ DEBUG=False
4349
+
4350
+ #将item_words合成为一个字符串,以便比较相似度
4351
+ words=''
4352
+ for w in item_words:
4353
+ words=words+w
4354
+ if DEBUG:
4355
+ print(f" DEBUG: item_words={item_words}, words={words}")
4356
+
4357
+ result=False
4358
+ best_similarity=0
4359
+ for e in alist:
4360
+ similarity=0
4361
+
4362
+ if DEBUG:
4363
+ print(f" DEBUG: e={e}")
4364
+
4365
+ if perfect_match: #要求e精确含有item_words中的每个元素
4366
+ if contains_all(e,item_words):
4367
+ similarity=string_similarity(e,words)
4368
+ else:
4369
+ similarity=string_similarity(e,words)
4370
+
4371
+ if DEBUG:
4372
+ print(f" DEBUG: item_words={item_words}, e={e}, similarity={similarity}")
4373
+
4374
+ if similarity > best_similarity:
4375
+ best_similarity=similarity
4376
+ result=e
4377
+
4378
+
4379
+ return result,best_similarity
4380
+
4381
+ if __name__ == '__main__':
4382
+ alist = ["CurrentDebt",
4383
+ "CurrentDebtAndCapitalLeaseObligation",
4384
+ "CurrentDeferredLiabilities",
4385
+ "CurrentLiabilities",
4386
+ "OtherCurrentBorrowings",
4387
+ "OtherCurrentLiabilities",
4388
+ "OtherNonCurrentLiabilities",
4389
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4390
+
4391
+ alist = [
4392
+ "CurrentDebtAndCapitalLeaseObligation",
4393
+ "CurrentDeferredLiabilities",
4394
+ "CurrentLiabilities",
4395
+ "OtherCurrentBorrowings",
4396
+ "OtherCurrentLiabilities",
4397
+ "OtherNonCurrentLiabilities",
4398
+ "TotalNonCurrentLiabilitiesNetMinorityInterest"]
4399
+
4400
+ item_words_list=[["Current","Debt"],["Current","Liabilities"]]
4401
+ item_words_list=[["Current","Liabilibities"],["Current","Debt"]]
4402
+
4403
+ list_contains_all_list(alist, item_words_list)
4404
+
4405
+ def list_contains_all_list(alist, item_words_list):
4406
+ """
4407
+
4408
+ 功能:测试列表alist中是否有元素含有字符串列表组中item_words_list各个item_words中的全部元素,忽略字母大小写
4409
+ 参数:
4410
+ alist:字符串列表,大小写不限
4411
+ item_words_list:字符串列表组,大小写不限。第1个为最佳字符串列表,后面可跟多个替代最佳字符串列表
4412
+ 注意:为避免大小写字母的影响,比较前需要先将两边的字母全部小写化
4413
+ 返回值:
4414
+ 若列表alist中有多个元素含有字符串列表item_words中的全部元素,返回相似度最高的元素
4415
+ 若出现多个最高相似度相同的,则返回第一个
4416
+ 若无则返回False
4417
+ """
4418
+ DEBUG=False
4419
+
4420
+ best_result=False
4421
+ best_similarity=0
4422
+
4423
+ for iwords in item_words_list:
4424
+ result,similarity=list_contains_all(alist, iwords,perfect_match=False)
4425
+ if DEBUG:
4426
+ print(" DEBUG: iwords={0}, alist={1}".format(iwords,alist))
4427
+ #print(" DEBUG: result={0}, similarity={1}".format(result,similarity))
4428
+ print('')
4429
+ print(f" DEBUG: result={result}, similarity={similarity:.2f}")
4430
+
4431
+ if similarity > best_similarity:
4432
+ best_similarity=similarity
4433
+ best_result=result
4434
+
4435
+ return best_result
4436
+
4437
+
4438
+ #==============================================================================
4439
+ if __name__ == '__main__':
4440
+ max_sleep=30
4441
+
4442
+ sleep_random(max_sleep)
4443
+
4444
+ def sleep_random(max_sleep=30):
4445
+ """
4446
+
4447
+ 功能:随机挂起秒数,以防被数据源封堵IP地址,适用于连续抓取同种信息时。
4448
+ 参数:
4449
+ max_sleep:最大挂起秒数,默认30秒。随机挂起1-30秒。
4450
+ """
4451
+
4452
+ import time; import random
4453
+
4454
+ random_int=random.randint(1,max_sleep)
4455
+ time.sleep(random_int)
4456
+
4457
+ return
4458
+
4211
4459
  #==============================================================================
4212
4460
  if __name__ == '__main__':
4213
4461
  s = "Hello, 世界! This is a test string with symbols #$%^&*()."
@@ -0,0 +1,839 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 本模块功能:上市公司的财务报表分析,数据层
4
+ 特点1:科目项目不采用字符串匹配方法,采用contains_any和contains_all匹配方法!
5
+ 特点2:以苹果财报的项目名称为基准!其他股票的财报项目名称若不同则转换为苹果财报的名称
6
+ 所属工具包:证券投资分析工具SIAT
7
+ SIAT:Security Investment Analysis Tool
8
+ 创建日期:2024年11月28日
9
+ 最新修订日期:2024年11月28日
10
+ 作者:王德宏 (WANG Dehong, Peter)
11
+ 作者单位:北京外国语大学国际商学院
12
+ 版权所有:王德宏
13
+ 用途限制:仅限研究与教学使用,不可商用!商用需要额外授权。
14
+ 特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
15
+ """
16
+
17
+ #==============================================================================
18
+ #关闭所有警告
19
+ import warnings; warnings.filterwarnings('ignore')
20
+
21
+ from siat.common import *
22
+ import pandas as pd
23
+ import numpy as np
24
+ #==============================================================================
25
+ #本模块使用yahooquery插件
26
+ #==============================================================================
27
+ if __name__=='__main__':
28
+ symbol='AAPL' #以其财报项目名称作为基准
29
+
30
+ symbol='JD'
31
+ symbol='INTL'
32
+ symbol='MSFT'
33
+ symbol='600519.SS'
34
+ symbol='00700.HK'
35
+
36
+ symbol='601398.SS'
37
+ symbol='601328.SS'
38
+
39
+ max_count=3
40
+ max_sleep=30
41
+
42
+ report_type="balance sheet"
43
+ printout=True
44
+
45
+ bsdf_raw=get_1statement_yahoo2(symbol,report_type="balance sheet",fix_entry=False)
46
+ bsdf_fix=get_1statement_yahoo2(symbol,report_type="balance sheet",fix_entry=True)
47
+
48
+ isdf_raw=get_1statement_yahoo2(symbol,report_type="income statement",fix_entry=False)
49
+
50
+
51
+ cfsdf_raw=get_1statement_yahoo2(symbol,report_type="cash flow",fix_entry=False)
52
+
53
+
54
+
55
+ def get_1statement_yahoo2(symbol,report_type="balance sheet", \
56
+ report_period="all", \
57
+ max_count=3,max_sleep=30, \
58
+ fix_entry=True,printout=False):
59
+ """
60
+ 功能:获取雅虎财经上一只股票所有的年度和季度资产负债表,采用contains匹配法
61
+
62
+ 参数:
63
+ symbol:股票代码,五位港股需要转换为四位
64
+ report_type:需要抓取的财报类型,默认资产负债表"balance sheet"
65
+ report_period:财报期间,默认年报+季报all,可选单纯年报(annual)或单纯季报(quarterly)
66
+ max_count:抓取失败时的总尝试次数,默认3
67
+ max_sleep=30:抓取失败时再次尝试间隔的随机秒数最大值
68
+ fix_entry:是否对重点科目进行检查和填充,默认是True
69
+ printout:是否打印赚取到的财报,默认否False
70
+
71
+ 返回值:成功时返回df,失败时返回None
72
+ """
73
+ report_type=report_type.lower()
74
+ if 'balance' in report_type:
75
+ report_type="balance sheet"
76
+ elif 'income' in report_type:
77
+ report_type="income statement"
78
+ else:
79
+ report_type="cash flow statement"
80
+ print(f" Retrieving {report_type} of {symbol} ... ...")
81
+
82
+ symbol=symbol.upper()
83
+ result,prefix,suffix=split_prefix_suffix(symbol)
84
+ #处理港股代码:五位变四位
85
+ if result & (suffix=='HK'):
86
+ if len(prefix)==5:
87
+ symbol=symbol[1:]
88
+
89
+ #=====抓取财务报告==========================================================
90
+ from yahooquery import Ticker
91
+ #问题:如何判断无效的symbol?
92
+ try:
93
+ stock = Ticker(symbol)
94
+ except:
95
+ print(" #Warning(get_1statement_yahoo2): Yahoo Finance currently unaccessable")
96
+ return None
97
+
98
+ if contains_any(report_period,['all','annual']):
99
+ #获取近5年年报,最多尝试max_count次
100
+ for c in range(max_count):
101
+ if report_type=="balance sheet":
102
+ stmta=stock.balance_sheet() # Defaults to Annual
103
+ elif report_type=="income statement":
104
+ stmta=stock.income_statement()
105
+ else:
106
+ stmta=stock.cash_flow()
107
+
108
+ #若抓取成功则直接结束
109
+ if isinstance(stmta,pd.DataFrame): break
110
+
111
+ #若能够访问雅虎则挂起一会儿再尝试访问,否则结束
112
+ if test_yahoo_finance():
113
+ sleep_random(max_sleep)
114
+ else: break
115
+
116
+ #获取信息失败,判断原因
117
+ if not isinstance(stmta,pd.DataFrame):
118
+ if test_yahoo_finance():
119
+ print(" #Warning(get_1statement_yahoo2): {} not found for annual reports".format(symbol))
120
+ else:
121
+ print(" #Warning(get_1statement_yahoo2): sorry, Yahoo Finance currently unaccessable")
122
+ return None
123
+ else:
124
+ stmta=None
125
+
126
+ if contains_any(report_period,['all','quarterly']):
127
+ #获取近7个季度报,注意含其中跨过的年报,与年报合并时需要去重!
128
+ for c in range(max_count):
129
+ if report_type=="balance sheet":
130
+ stmtq=stock.balance_sheet(frequency="q") # Defaults to Annual
131
+ elif report_type=="income statement":
132
+ stmtq=stock.income_statement(frequency="q")
133
+ else:
134
+ stmtq=stock.cash_flow(frequency="q")
135
+
136
+ if isinstance(stmtq,pd.DataFrame): break
137
+ else:
138
+ sleep_random(max_sleep)
139
+
140
+ #前面已经判断过雅虎是否能够访问以及symbol是否存在,此处无需再判断
141
+ else:
142
+ stmtq=None
143
+
144
+ #合并年度和季度报表
145
+ if not (stmta is None) and not (stmtq is None):
146
+ stmt=pd.concat([stmta,stmtq])
147
+ elif not (stmta is None):
148
+ stmt=stmta
149
+ elif not (stmtq is None):
150
+ stmt=stmtq
151
+ else:
152
+ print(" #Error(get_1statement_yahoo2): retrieved no periodic reports for",symbol)
153
+ return None
154
+
155
+ #合并后排序+去重
156
+ stmt.sort_values(by=['asOfDate','periodType'],inplace=True)
157
+ #去掉重复记录: 保留年报数据项多,去掉数据项少的季报
158
+ stmt.drop_duplicates(subset=['asOfDate'],keep='first',inplace=True)
159
+
160
+
161
+ #=====关键字段检查与缺失替代处理=============================================
162
+ if fix_entry:
163
+
164
+ if report_type=="balance sheet":
165
+ #检查和填补资产负债表科目
166
+ stmt=check_fill_bs_yahoo(stmt)
167
+ elif report_type=="income statement":
168
+ #检查和填补利润表科目
169
+ stmt=check_fill_is_yahoo(stmt)
170
+ else:
171
+ #检查和填补现金流量表科目
172
+ stmt=check_fill_cfs_yahoo(stmt)
173
+
174
+ #字段再排序
175
+ stmt_cols=list(stmt)
176
+ head_cols=['asOfDate','periodType','currencyCode'] #不参与排序的字段
177
+ for c in head_cols:
178
+ stmt_cols.remove(c)
179
+ stmt_cols.sort()
180
+ stmt_cols=head_cols+stmt_cols
181
+ stmt=stmt[stmt_cols]
182
+
183
+ #总检查:总资产=总负债+总权益是否成立
184
+ #stmt['TA-TL-TE']=stmt['TotalAssets']-stmt['TotalLiabilities']-stmt['TotalEquity']
185
+
186
+ #是否打印供检查
187
+ if printout:
188
+ print_stmt_yahoo2(stmt,title_prefix=report_type.title())
189
+
190
+ return stmt
191
+
192
+
193
+ #==============================================================================
194
+ if __name__=='__main__':
195
+ symbol="AAPL"
196
+
197
+ from yahooquery import Ticker
198
+ stock = Ticker(symbol)
199
+ stmt=stock.balance_sheet()
200
+
201
+ df=check_fill_bs_yahoo(stmt)
202
+
203
+ def check_fill_bs_yahoo(stmt):
204
+ """
205
+
206
+ 功能:检查和填补雅虎抓取到的资产负债表部分科目
207
+ 科目基准:苹果财报科目
208
+
209
+ 输入:雅虎抓取到的原始资产负债表
210
+ 输出:经过检查的资产负债表,缺失科目可不断填充
211
+
212
+ 注意:单纯在资产负债表内填充缺失项比较勉强
213
+ 抓取单表后可暂不填充,在在三表合成后综合填充!!!
214
+ """
215
+ #=====资产部分
216
+ #应收账款
217
+ entry_item='AccountsReceivable' #主项科目
218
+ #entry_item in list(stmt)
219
+ entry_words=["accounts","receivable"] #主项科目的关键词,忽略字母大小写
220
+ #可替代科目组的关键词列表组
221
+ entry_alternatives=[["receivables"]]
222
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
223
+
224
+
225
+ #存货
226
+ entry_item='Inventory' #主项科目
227
+ #entry_item in list(stmt)
228
+ entry_words=["inventory"] #主项科目的关键词,忽略字母大小写
229
+ #可替代科目组的关键词列表组
230
+ entry_alternatives=[]
231
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
232
+
233
+
234
+ #=====负债部分
235
+ #流动(有息)债务CurrentDebt:可替代科目-流动负债
236
+ entry_item='CurrentDebt' #主项科目
237
+ entry_words=["current","debt"] #主项科目的关键词,忽略字母大小写
238
+ #可替代科目组的关键词列表组
239
+ #entry_alternatives=[["current","liabilities"],["current","borrowing"],["current","obligation"]]
240
+ entry_alternatives=[["current","liabilities"]]
241
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
242
+
243
+
244
+ #流动负债(有息债务+应付)
245
+ entry_item='CurrentLiabilities' #主项科目
246
+ entry_words=["current","liabilities"] #主项科目的关键词,忽略字母大小写
247
+ #可替代科目组的关键词列表组
248
+ entry_alternatives=[["current","debt"]]
249
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
250
+
251
+
252
+ #应付账款
253
+ entry_item='AccountsPayable' #主项科目
254
+ #entry_item in list(stmt)
255
+ entry_words=["accounts","payable"] #主项科目的关键词,忽略字母大小写
256
+ #可替代科目组的关键词列表组
257
+ entry_alternatives=[["payables"]]
258
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
259
+
260
+
261
+ #总(有息)债务
262
+ entry_item='TotalDebt' #主项科目
263
+ #entry_item in stmt_cols
264
+ entry_words=["total","debt"] #主项科目的关键词,忽略字母大小写
265
+ #可替代科目组的关键词列表组
266
+ entry_alternatives=[["total","liabilities"]]
267
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
268
+
269
+
270
+ #总负债
271
+ entry_item='TotalLiabilities' #主项科目
272
+ #entry_item in list(stmt)
273
+ entry_words=["total","liabilities"] #主项科目的关键词,忽略字母大小写
274
+ #可替代科目组的关键词列表组
275
+ entry_alternatives=[["total","debt"]]
276
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
277
+
278
+
279
+ #=====权益部分
280
+ #权益总额
281
+ entry_item='TotalEquity' #主项科目
282
+ #entry_item in list(stmt)
283
+ entry_words=["total","equity"] #主项科目的关键词,忽略字母大小写
284
+ #可替代科目组的关键词列表组
285
+ entry_alternatives=[["stock","holder","quity"]]
286
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
287
+
288
+
289
+ return stmt
290
+
291
+ #==============================================================================
292
+ if __name__=='__main__':
293
+ from yahooquery import Ticker
294
+ stock = Ticker("AAPL")
295
+ stmt=stock.income_statement()
296
+
297
+ df=check_fill_is_yahoo(stmt)
298
+
299
+ def check_fill_is_yahoo(stmt):
300
+ """
301
+
302
+ 功能:检查和填补雅虎抓取到的利润表部分科目
303
+ 输入:雅虎抓取到的原始利润表
304
+ 输出:经过检查的利润表,缺失科目可不断填充
305
+
306
+ 注意:单纯在利润表内填充缺失项比较勉强,可在三表合成后综合填充!!!
307
+ """
308
+
309
+ #=====收入部分
310
+ #成本与费用
311
+ #利息费用
312
+ entry_item='InterestExpense' #主项科目
313
+ #entry_item in list(stmt)
314
+ entry_words=["interest","expense"] #主项科目的关键词,忽略字母大小写
315
+ #可替代科目组的关键词列表组
316
+ entry_alternatives=[["interest","expense","operating"]]
317
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
318
+
319
+ #收入成本
320
+ entry_item='CostOfRevenue' #主项科目
321
+ #entry_item in list(stmt)
322
+ entry_words=["cost","revenue"] #主项科目的关键词,忽略字母大小写
323
+ #可替代科目组的关键词列表组
324
+ entry_alternatives=[["reconciled","cost","revenue"]]
325
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
326
+
327
+ #营业成本
328
+ entry_item='OperatingExpense' #主项科目
329
+ #entry_item in list(stmt)
330
+ entry_words=["operating","expense"] #主项科目的关键词,忽略字母大小写
331
+ #可替代科目组的关键词列表组
332
+ entry_alternatives=[["cost","revenue"]]
333
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
334
+
335
+ #折旧与摊销
336
+ entry_item='Depreciation' #主项科目
337
+ #entry_item in list(stmt)
338
+ entry_words=["depreciation"] #主项科目的关键词,忽略字母大小写
339
+ #可替代科目组的关键词列表组
340
+ entry_alternatives=[["reconciled","depreciation"]]
341
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
342
+
343
+ #收入与利润
344
+ #营业收入
345
+ entry_item='OperatingRevenue' #主项科目
346
+ #entry_item in list(stmt)
347
+ entry_words=["operating","revenue"] #主项科目的关键词,忽略字母大小写
348
+ #可替代科目组的关键词列表组
349
+ entry_alternatives=[["total","revenue"]]
350
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
351
+
352
+ #营业利润
353
+ entry_item='OperatingIncome' #主项科目
354
+ #entry_item in list(stmt)
355
+ entry_words=["operating","income"] #主项科目的关键词,忽略字母大小写
356
+ #可替代科目组的关键词列表组
357
+ entry_alternatives=[]
358
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
359
+
360
+ empty = stmt[entry_item].isna().all()
361
+ if empty:
362
+ stmt[entry_item]=stmt['OperatingRevenue']-stmt['OperatingExpense']
363
+
364
+ #EBITDA
365
+ entry_item='EBITDA' #主项科目
366
+ #entry_item in list(stmt)
367
+ entry_words=["ebitda"] #主项科目的关键词,忽略字母大小写
368
+ #可替代科目组的关键词列表组
369
+ entry_alternatives=[]
370
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
371
+
372
+ #EBIT
373
+ entry_item='EBIT' #主项科目
374
+ #entry_item in list(stmt)
375
+ entry_words=["ebitda"] #主项科目的关键词,忽略字母大小写
376
+ #可替代科目组的关键词列表组
377
+ entry_alternatives=[]
378
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
379
+ empty = stmt[entry_item].isna().all()
380
+ if empty:
381
+ stmt[entry_item]=stmt['EBITDA']+stmt['Depreciation']
382
+
383
+ #毛利润
384
+ entry_item='GrossProfit' #主项科目
385
+ #entry_item in list(stmt)
386
+ entry_words=["gross","profit"] #主项科目的关键词,忽略字母大小写
387
+ #可替代科目组的关键词列表组
388
+ entry_alternatives=[["operating","income"],["pretax","income"]]
389
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
390
+
391
+ empty = stmt[entry_item].isna().all()
392
+ if empty:
393
+ stmt[entry_item]=stmt['OperatingRevenue']-stmt['OperatingExpense']
394
+
395
+ empty = stmt[entry_item].isna().all()
396
+ if empty:
397
+ stmt[entry_item]=stmt['EBITDA']
398
+
399
+
400
+ return stmt
401
+
402
+ #==============================================================================
403
+ if __name__=='__main__':
404
+ from yahooquery import Ticker
405
+ stock = Ticker("AAPL")
406
+ stmt=stock.cash_flow()
407
+
408
+ df=check_fill_cfs_yahoo(stmt)
409
+
410
+ def check_fill_cfs_yahoo(stmt):
411
+ """
412
+
413
+ 功能:检查和填补雅虎抓取到的现金流量表部分科目
414
+ 输入:雅虎抓取到的原始现金流量表
415
+ 输出:经过检查的现金流量表,缺失科目可不断填充
416
+
417
+ 注意:单纯在现金流量表内填充缺失项比较勉强,可在三表合成后综合填充!!!
418
+ """
419
+
420
+ #现金股利支付
421
+ entry_item='CashDividendsPaid' #主项科目
422
+ #entry_item in list(stmt)
423
+ entry_words=["cash","dividends","paid"] #主项科目的关键词,忽略字母大小写
424
+ #可替代科目组的关键词列表组
425
+ entry_alternatives=[["common","stock","dividend","paid"]]
426
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
427
+
428
+ #融资活动现金流
429
+ entry_item='CashFlowFromFinancingActivities' #主项科目
430
+ #entry_item in list(stmt)
431
+ entry_words=["cash","flow","from","financing","activities"] #主项科目的关键词,忽略字母大小写
432
+ #可替代科目组的关键词列表组
433
+ entry_alternatives=[["cash","flow","from","continuing","financing","activities"],["financing","cash","flow"]]
434
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
435
+
436
+ #投资活动现金流
437
+ entry_item='CashFlowFromInvestingActivities' #主项科目
438
+ #entry_item in list(stmt)
439
+ entry_words=["cash","flow","from","investing","activities"] #主项科目的关键词,忽略字母大小写
440
+ #可替代科目组的关键词列表组
441
+ entry_alternatives=[["cash","flow","from","continuing","investing","activities"],["investing","cash","flow"]]
442
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
443
+
444
+ #经营活动现金流
445
+ entry_item='CashFlowFromOperatingActivities' #主项科目
446
+ #entry_item in list(stmt)
447
+ entry_words=["cash","flow","from","operating","activities"] #主项科目的关键词,忽略字母大小写
448
+ #可替代科目组的关键词列表组
449
+ entry_alternatives=[["cash","flow","from","continuing","operating","activities"],["operating","cash","flow"]]
450
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
451
+
452
+ return stmt
453
+ #==============================================================================
454
+ if __name__=='__main__':
455
+ from yahooquery import Ticker
456
+ stock = Ticker("AAPL")
457
+ stmt=stock.cash_flow()
458
+
459
+ title_prefix="Cash Flow Statement"
460
+
461
+ print_stmt_yahoo2(stmt,title_prefix)
462
+
463
+ def print_stmt_yahoo2(stmt,title_prefix):
464
+ """
465
+
466
+ 功能:打印雅虎抓取的财报
467
+ """
468
+
469
+ stmtprt1=stmt.copy()
470
+
471
+ if 'ticker' in list(stmtprt1):
472
+ symbol=stmtprt1['ticker'][0]
473
+ del stmtprt1['ticker']
474
+ else:
475
+ symbol=stmtprt1.index[0]
476
+
477
+ stmtprt1['reportDate']=stmtprt1['asOfDate'].apply(lambda x: x.strftime("%y-%m-%d"))
478
+ stmtprt1.set_index('reportDate',inplace=True)
479
+ del stmtprt1['asOfDate']
480
+
481
+ currencyCode=stmtprt1['currencyCode'].values[0]
482
+ del stmtprt1['currencyCode']
483
+
484
+ cols1=list(stmtprt1)
485
+ cols1.remove('periodType')
486
+ million=1000000
487
+ for c in cols1:
488
+ stmtprt1[c]=stmtprt1[c].apply(lambda x: round(x/million,2))
489
+
490
+ stmtprt2=stmtprt1.T
491
+ cols=list(stmtprt2)
492
+ cols.sort(reverse=True)
493
+ stmtprt2=stmtprt2[cols]
494
+ stmtprt2['Item']=stmtprt2.index
495
+ stmtprt2=stmtprt2[['Item']+cols]
496
+
497
+ titletxt=f"{symbol}: {title_prefix}, in {currencyCode} millions"
498
+ footnote="Note: 12M indicates annual report, 3M quaterly reports"
499
+ df_display_CSS(stmtprt2,titletxt=titletxt,footnote=footnote, \
500
+ facecolor='papayawhip',decimals=2, \
501
+ first_col_align='left',second_col_align='right', \
502
+ last_col_align='right',other_col_align='right', \
503
+ titile_font_size='15px',heading_font_size='11px', \
504
+ data_font_size='11px',footnote_font_size='13px')
505
+
506
+ return
507
+
508
+ #==============================================================================
509
+ if __name__ == '__main__':
510
+ entry_item='CurrentDebt' #主项科目
511
+ entry_words=["current","debt"] #主项科目的关键词,忽略字母大小写
512
+ #可替代科目组的关键词列表组
513
+ entry_alternatives=[["current","liabilities"],["current","borrowing"],["current","obligation"]]
514
+
515
+ entry_item in list(stmt)
516
+ stmt=check_fill_entry(stmt,entry_item,entry_words,entry_alternatives)
517
+ stmt[entry_item]
518
+
519
+ def check_fill_entry(stmt,entry_item,entry_words,entry_alternatives):
520
+ """
521
+
522
+ 功能:检查抓取的原始财报文件stmt中的科目entry_item
523
+ 若不存在则使用可替代科目。
524
+ 若存在但全为空,则使用可替代科目填充。
525
+ 若可替代科目也不存在,则赋值为全空。
526
+
527
+ 参数:
528
+ stmt:赚取到的原始财报df
529
+ entry_item:需要处理的科目
530
+ entry_words:需处理科目的关键词列表
531
+ entry_alternatives:可替代科目的关键词组列表
532
+
533
+ 返回值:更新后的财报df
534
+ """
535
+
536
+ #合成所有的关键词列表组
537
+ entry_options=[entry_words]+entry_alternatives
538
+
539
+ #按照相似度匹配
540
+ stmt_cols=list(stmt)
541
+ entry_name=list_contains_all_list(stmt_cols,entry_options)
542
+
543
+ if entry_name: #找到
544
+ if entry_name != entry_item: #找到但不同名
545
+ #若科目名称不同则映射该名称
546
+ stmt[entry_item]=stmt[entry_name]
547
+ #del stmt[entry_name] #删除替代项目名称
548
+ else: #找到同名
549
+ #检查该科目是否全为空
550
+ empty = stmt[entry_item].isna().all()
551
+ if empty:
552
+ entry_name_alternative=list_contains_all_list(stmt_cols,entry_alternatives)
553
+ stmt[entry_item]=stmt[entry_name_alternative]
554
+ else: #主项未找到,可替代科目也未找到
555
+ stmt[entry_item]=np.nan
556
+
557
+ return stmt
558
+
559
+ #==============================================================================
560
+ #==============================================================================
561
+ if __name__=='__main__':
562
+ ticker='AAPL'
563
+ ticker='00700.HK'
564
+
565
+ report_period="all"
566
+ max_count=3; max_sleep=30
567
+ pre_fix_entry=False; post_fix_entry=True
568
+ pre_printout=False; printout=True
569
+
570
+ fsdf=get_financial_statements2_yahoo(ticker)
571
+
572
+ def get_financial_statements2_yahoo(ticker, \
573
+ report_period="all", \
574
+ max_count=3,max_sleep=30, \
575
+ pre_fix_entry=False,post_fix_entry=True, \
576
+ pre_printout=False,printout=False):
577
+ """
578
+ 功能:获取雅虎财经上一只股票所有的年度和季度财务报表
579
+ """
580
+
581
+ # 变换港股代码5位-->4位
582
+ result,prefix,suffix=split_prefix_suffix(ticker)
583
+ if result & (suffix=='HK'):
584
+ if len(prefix)==5:
585
+ ticker=ticker[1:]
586
+
587
+ print(f" Searching for financial statements of {ticker} ... ...")
588
+
589
+ #获取资产负债表
590
+ df_bs=get_1statement_yahoo2(ticker,report_type="balance sheet", \
591
+ report_period=report_period, \
592
+ max_count=max_count,max_sleep=max_sleep, \
593
+ fix_entry=pre_fix_entry, \
594
+ printout=pre_printout)
595
+
596
+
597
+ #获取利润表
598
+ df_is=get_1statement_yahoo2(ticker,report_type="income statement", \
599
+ report_period=report_period, \
600
+ max_count=max_count,max_sleep=max_sleep, \
601
+ fix_entry=pre_fix_entry, \
602
+ printout=pre_printout)
603
+
604
+
605
+ #获取现金流量表
606
+ df_cfs=get_1statement_yahoo2(ticker,report_type="cash flow statement", \
607
+ report_period=report_period, \
608
+ max_count=max_count,max_sleep=max_sleep, \
609
+ fix_entry=pre_fix_entry, \
610
+ printout=pre_printout)
611
+
612
+ #=====三表合并
613
+ #合并1:资产负债表+利润表
614
+ head_cols=['asOfDate','periodType','currencyCode']
615
+ df_bs_is=pd.merge(df_bs,df_is,on=head_cols)
616
+
617
+ #合并2:+现金流量表
618
+ df=pd.merge(df_bs_is,df_cfs,on=head_cols)
619
+ df['ticker']=ticker
620
+
621
+ #合成后填充缺失项
622
+ if post_fix_entry:
623
+ df1=check_fill_fs_yahoo(df)
624
+ else:
625
+ df1=df
626
+
627
+ print(f" Successfully retrieved financial statements of {ticker}")
628
+
629
+ if printout:
630
+ df2=df1.copy()
631
+ df2.replace(0,np.nan,inplace=True)
632
+ title_prefix="Comprehensive Financial Statement"
633
+ print_stmt_yahoo2(df2,title_prefix)
634
+
635
+ return df1
636
+
637
+ #==============================================================================
638
+ if __name__=='__main__':
639
+ from yahooquery import Ticker
640
+ stock = Ticker("AAPL")
641
+ stmt=stock.cash_flow()
642
+
643
+ df=check_fill_cfs_yahoo(stmt)
644
+
645
+ def check_fill_fs_yahoo(stmt):
646
+ """
647
+
648
+ 功能:检查和填补雅虎抓取到的三张表合成后缺失项
649
+ 输入:雅虎抓取到的三张表合成
650
+ 输出:经过检查的三张表合成,缺失科目可不断填充
651
+
652
+ 注意:单纯在资产负债表/利润表/现金流量表内填充缺失项比较勉强,可在三表合成后综合填充
653
+ """
654
+ df=stmt.copy()
655
+ #为计算方便,将所有的nan替换为0
656
+ df.fillna(0,inplace=True)
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+
665
+ return df
666
+
667
+
668
+
669
+ """
670
+ 最终获得的表结构:
671
+ ['asOfDate',
672
+ 'periodType',
673
+
674
+ 'AccountsPayable(应付账款)',
675
+ 'AccountsReceivable(应收账款)',
676
+ 'AccumulatedDepreciation(累计折旧)',
677
+ 'AdditionalPaidInCapital(资本公积,资本溢价,附加资本;paid-in capital:实收资本;缴入资本)',
678
+ 'AllowanceForDoubtfulAccountsReceivable(备抵应收呆帐)',
679
+ 'AvailableForSaleSecurities(可供出售金融资产;trading securities: 交易性金融资产)',
680
+ 'BuildingsAndImprovements(建筑物改良)',
681
+ 'CapitalStock(股本)',
682
+ 'CashAndCashEquivalents(现金及现金等价物)',
683
+ 'CashCashEquivalentsAndShortTermInvestments(现金、现金等价物及短期投资)',
684
+ 'CashEquivalents(现金等价物)',
685
+ 'CashFinancial(?)',
686
+ 'CommonStock(普通股)',
687
+ 'CommonStockEquity(普通股权益?)',
688
+ 'ConstructionInProgress(在建工程)',
689
+ 'CurrentAssets(流动资产)',
690
+ 'CurrentLiabilities(流动负债)',
691
+ 'DividendsPayable(应付股利)',
692
+ 'FinishedGoods(制成品)',
693
+ 'GoodwillAndOtherIntangibleAssets(商誉及其他无形资产)',
694
+ 'GrossAccountsReceivable(应收账款总额)',
695
+ 'GrossPPE(固定资产总额)',
696
+ 'InventoriesAdjustmentsAllowances(存货调整备抵)',
697
+ 'Inventory(存货)',
698
+ 'InvestedCapital(投入资本)',
699
+ 'InvestmentinFinancialAssets(金融资产投资?)',
700
+ 'LandAndImprovements(土地改良)',
701
+ 'MachineryFurnitureEquipment(机械家具设备?)',
702
+ 'MinorityInterest(少数股东损益?)',
703
+ 'NetPPE(固定资产净值)',
704
+ 'NetTangibleAssets(有形资产净值)',
705
+ 'NonCurrentDeferredAssets(非流动递延资产)',
706
+ 'NonCurrentDeferredTaxesAssets(非流动递延税项资产?)',
707
+ 'NonCurrentDeferredTaxesLiabilities(非流动递延税金负债?)',
708
+ 'OrdinarySharesNumber(普通股数量?)',
709
+ 'OtherCurrentAssets(其他流动资产)',
710
+ 'OtherCurrentLiabilities(其他流动负债)',
711
+ 'OtherEquityInterest(其他股权)',
712
+ 'OtherIntangibleAssets(其他有形资产)',
713
+ 'OtherNonCurrentAssets(其他非流动资产)',
714
+ 'OtherPayable(其它应付款)',
715
+ 'OtherProperties(?)',
716
+ 'OtherReceivables(其他应收款)',
717
+ 'Payables(应付款项)',
718
+ 'PrepaidAssets(预付资产;预付费用)',
719
+ 'Properties(财产?)',
720
+ 'RawMaterials(原材料)',
721
+ 'RetainedEarnings(留存收益)',
722
+ 'ShareIssued(股票发行)',
723
+ 'StockholdersEquity(股东权益)',
724
+ 'TangibleBookValue(有形资产账面价值)',
725
+ 'TotalAssets(总资产)',
726
+ 'TotalCapitalization(资本总额?)',
727
+ 'TotalEquityGrossMinorityInterest(少数股东权益总额)',
728
+ 'TotalLiabilitiesNetMinorityInterest(?)',
729
+ 'TotalNonCurrentAssets(非流动资产总额)',
730
+ 'TotalNonCurrentLiabilitiesNetMinorityInterest(?)',
731
+ 'TotalTaxPayable(应缴税款总额)',
732
+ 'TradeandOtherPayablesNonCurrent(?)',
733
+ 'WorkInProcess(在制品)',
734
+ 'WorkingCapital(营运资本)',
735
+ 'Amortization(摊销)',
736
+
737
+ 'BasicAverageShares(未稀释的平均股数?)',
738
+ 'BasicEPS( 基本每股收益,指属于普通股股东的当期净利润,除以发行在外普通股的加权平均数,可按存在月数加权)',
739
+ 'CostOfRevenue(主营业务成本,营收成本)',
740
+ 'DepreciationAndAmortizationInIncomeStatement(损益表中的折旧和摊销)',
741
+ 'DepreciationIncomeStatement(损益表中的折旧)',
742
+ 'DilutedAverageShares(稀释后平均股数?)',
743
+ 'DilutedEPS(考虑了可转换债券和股票期权可能行权对于流通在外股数的影响)',
744
+ 'EBIT(息税前利润)',
745
+ 'EBITDA(未计利息、税项、折旧及摊销前的利润)',
746
+ 'GeneralAndAdministrativeExpense(一般管理费用)',
747
+ 'GrossProfit(营业毛利)',
748
+ 'ImpairmentOfCapitalAssets(资本资产减值)',
749
+ 'InterestExpense(利息费用)',
750
+ 'InterestExpenseNonOperating(非经营性利息费用)',
751
+ 'InterestIncome(利息收益)',
752
+ 'InterestIncomeNonOperating(非经营性利息收入)',
753
+ 'MinorityInterests(少数股东权益)',
754
+ 'NetIncome(净利润)',
755
+ 'NetIncomeCommonStockholders(归属于普通股股东的净利润,用于计算EPS和PE)',
756
+ 'NetIncomeContinuousOperations(扣非后净利润)',
757
+ 'NetIncomeFromContinuingAndDiscontinuedOperation(来自持续经营和停止经营业务的净收入)',
758
+ 'NetIncomeFromContinuingOperationNetMinorityInterest(不归属少数股东的扣非后净利润?)',
759
+ 'NetIncomeIncludingNoncontrollingInterests(包括非控股权的净收入?)',
760
+ 'NetInterestIncome(净利息收入)',
761
+ 'NetNonOperatingInterestIncomeExpense(?)',
762
+ 'NormalizedEBITDA(调整后EBITDA?)',
763
+ 'NormalizedIncome(调整后利润?)',
764
+ 'OperatingExpense(营业费用)',
765
+ 'OperatingIncome(营业利润)',
766
+ 'OperatingRevenue(营业收入)',
767
+ 'OtherNonOperatingIncomeExpenses(其他营业外收入支出?)',
768
+ 'OtherOperatingExpenses(其它营业费用)',
769
+ 'OtherSpecialCharges(其他特殊费用)',
770
+ 'OtherunderPreferredStockDividend(优先股股利下的其他项目)',
771
+ 'PretaxIncome(税前利润)',
772
+ 'ReconciledCostOfRevenue(对账后的经营收入成本?)',
773
+ 'ReconciledDepreciation(对账后的折旧)',
774
+ 'RentAndLandingFees(租金及土地费用?)',
775
+ 'RentExpenseSupplemental(补充租金费用?)',
776
+ 'ResearchAndDevelopment(研发费用)',
777
+ 'SellingAndMarketingExpense(销售和市场营销费用)',
778
+ 'SellingGeneralAndAdministration(销售及一般管理费用)',
779
+ 'SpecialIncomeCharges(特殊收入的手续费)',
780
+ 'TaxEffectOfUnusualItems(非常项目的税收影响)',
781
+ 'TaxProvision(税金?)',
782
+ 'TaxRateForCalcs(计算用的税率)',
783
+ 'TotalExpenses(总费用)',
784
+ 'TotalOperatingIncomeAsReported(报告的总营业利润)',
785
+ 'TotalOtherFinanceCost(其他财务成本合计)',
786
+ 'TotalRevenue(总收入)',
787
+ 'TotalUnusualItems(非经常性项目总计)',
788
+ 'TotalUnusualItemsExcludingGoodwill(不包括商誉的非经常项目合计)',
789
+ 'WriteOff(冲销?)',
790
+
791
+ 'BeginningCashPosition(期初现金头寸)',
792
+ 'CapitalExpenditure(资本支出)',
793
+ 'CashDividendsPaid(现金股利支付)',
794
+ 'ChangeInCashSupplementalAsReported(现金补充变更报告?)',
795
+ 'ChangeInInventory(存货变化)',
796
+ 'ChangeInWorkingCapital(营运资本的变动额)',
797
+ 'DepreciationAndAmortization(折旧摊销)',
798
+ 'EndCashPosition(期末现金头寸)',
799
+ 'FreeCashFlow(自有现金流)',
800
+ 'InvestingCashFlow(投资现金流)',
801
+ 'NetOtherFinancingCharges(其他融资费用净额)',
802
+ 'NetOtherInvestingChanges(其他投资变动净额)',
803
+ 'OperatingCashFlow(营运现金流)',
804
+ 'OtherNonCashItems(其他非现金项目)'
805
+ ]
806
+ """
807
+
808
+ #==============================================================================
809
+ if __name__ == '__main__':
810
+ fsdf=get_financial_statements('601398.SS')
811
+ account_entry='TotalAssets'
812
+
813
+ fsdf1=fs_entry_begin(fsdf,account_entry='TotalAssets',suffix='_begin')
814
+
815
+ def fs_entry_begin(fsdf,account_entry='TotalAssets',suffix='_begin'):
816
+ """
817
+ 功能:以上年年报期末科目数值作为本期年报和季报的期初,仅适用于雅虎财报!
818
+ """
819
+ import pandas as pd
820
+ import numpy as np
821
+ #获取年报日期
822
+ ar_mm_dd=pd.to_datetime(fsdf[fsdf['periodType']=='12M']['asOfDate'].values[0]).strftime("%Y-%m-%d")[-5:]
823
+
824
+ fsdf['asOfDate_pd']=fsdf['asOfDate'].apply(lambda x: pd.to_datetime(x))
825
+ fsdf['Date_y4']=fsdf['asOfDate'].apply(lambda x: pd.to_datetime(x).strftime("%Y"))
826
+ fsdf['Date_begin_pd']=fsdf['Date_y4'].apply(lambda x: pd.to_datetime(str(int(x)-1)+'-'+ar_mm_dd))
827
+
828
+ asOfDate_pd_list=list(fsdf['asOfDate_pd'])
829
+ entry_begin=lambda x: fsdf[fsdf['asOfDate_pd']==x][account_entry].values[0] if x in asOfDate_pd_list else np.nan
830
+ fsdf[account_entry+suffix]=fsdf['Date_begin_pd'].apply(entry_begin)
831
+
832
+ fsdf.drop(['asOfDate_pd','Date_y4','Date_begin_pd'],axis=1,inplace=True)
833
+
834
+ return fsdf
835
+
836
+ #==============================================================================
837
+ #==============================================================================
838
+ #==============================================================================
839
+ #==============================================================================
siat/stock.py CHANGED
@@ -2654,10 +2654,10 @@ def stock_dividend(ticker,start="L3Y",end="today",facecolor='whitesmoke',fontcol
2654
2654
  try:
2655
2655
  div=stock.dividends
2656
2656
  except:
2657
- print(" #Error(stock_dividend): no div info found for",ticker)
2657
+ print(" #Error(stock_dividend): no dividend information found for",ticker)
2658
2658
  return None
2659
2659
  if len(div)==0:
2660
- print(" #Warning(stock_dividend): no div info found for",ticker)
2660
+ print(" #Warning(stock_dividend): no dividend information found for",ticker)
2661
2661
  return None
2662
2662
 
2663
2663
  # 去掉时区信息,避免合并中的日期时区冲突问题
@@ -2795,10 +2795,10 @@ def stock_split(ticker,start="L10Y",end="today",facecolor='whitesmoke',fontcolor
2795
2795
  try:
2796
2796
  div=stock.splits
2797
2797
  except:
2798
- print(" #Error(stock_split): no split info found for",ticker)
2798
+ print(" #Error(stock_split): no split information found for",ticker)
2799
2799
  return None
2800
2800
  if len(div)==0:
2801
- print(" #Warning(stock_split): no split info found for",ticker)
2801
+ print(" #Warning(stock_split): no split information found for",ticker)
2802
2802
  return None
2803
2803
 
2804
2804
  # 去掉时区信息,避免合并中的日期时区冲突问题
@@ -2810,7 +2810,7 @@ def stock_split(ticker,start="L10Y",end="today",facecolor='whitesmoke',fontcolor
2810
2810
  div1=div[div.index >= startdt]
2811
2811
  div2=div1[div1.index <= enddt]
2812
2812
  if len(div2)==0:
2813
- print(" #Warning(stock_split): no split info in period",fromdate,todate)
2813
+ print(" #Warning(stock_split): no split information in period",fromdate,todate)
2814
2814
  return None
2815
2815
 
2816
2816
  #对齐打印
siat/valuation.py CHANGED
@@ -1177,9 +1177,10 @@ def security_valuation(tickers,indicators,start,end, \
1177
1177
 
1178
1178
  #==============================================================================
1179
1179
  if __name__=='__main__':
1180
- df=security_trend(baijiu_stocks,indicator='PE',start='MRY',graph=False)
1180
+ bank_big=find_peers_china('国有大型银行Ⅱ',top=25)
1181
+ df=security_trend(bank_big,indicator='PE',start='MRY',graph=False)
1181
1182
  indicator='PE'
1182
- base=''
1183
+ base='601398.SS'
1183
1184
 
1184
1185
 
1185
1186
  def print_valuation(df,indicator='PE',base='',facecolor='whitesmoke'):
@@ -1189,14 +1190,15 @@ def print_valuation(df,indicator='PE',base='',facecolor='whitesmoke'):
1189
1190
  try:
1190
1191
  df1=df[indicator]
1191
1192
  except:
1192
- print(" #Warning: current dataframe does not support indicator",indicator)
1193
+ print(f" #Warning(print_valuation): unsupported indicator {indicator} in current dataframe")
1193
1194
  return
1194
1195
 
1195
1196
  collist=list(df1)
1196
1197
  base=base.upper()
1198
+ base=ticker_name(base)
1197
1199
  if not (base in collist):
1198
1200
  """
1199
- print(" #Warning: invalid item",base,"for current dataframe")
1201
+ print(" #Warning(print_valuation): invalid item",base,"for current dataframe")
1200
1202
  print(" Valid items in current dataframe:\n",collist)
1201
1203
  return
1202
1204
  """
@@ -1256,7 +1258,8 @@ def print_valuation(df,indicator='PE',base='',facecolor='whitesmoke'):
1256
1258
  #df4=df3[['序号','证券名称',col_mean,col_mean_rel,'均值对比',col_latest_date,col_latest_rel,'对比@'+col_latest_date]]
1257
1259
  df4=df3[['证券名称',col_mean,'均值排名',col_latest_date,'排名@'+col_latest_date,col_mean_rel,col_latest_rel,'均值对比','对比@'+col_latest_date]]
1258
1260
 
1259
- titletxt="*** 估值对比:"+indicator+",降序排列"
1261
+ #titletxt="估值对比:"+ectranslate(indicator)+",降序排列"
1262
+ titletxt="估值对比:{0}({1}),降序排列".format(ectranslate(indicator),indicator)
1260
1263
  """
1261
1264
  print("\n",titletxt,'\n')
1262
1265
  alignlist=['left','right','center','right','center']+['right']*(len(list(df4))-5)
@@ -1273,16 +1276,17 @@ def print_valuation(df,indicator='PE',base='',facecolor='whitesmoke'):
1273
1276
  #设置列数值对齐
1274
1277
  dispf=dispt.set_properties(**{'text-align':'center'})
1275
1278
  #设置前景背景颜色
1276
- dispf2=dispf.set_properties(**{'background-color':facecolor,'color':fontcolor})
1279
+ #dispf2=dispf.set_properties(**{'background-color':facecolor,'color':fontcolor})
1280
+ dispf2=dispf.set_properties(**{'background-color':facecolor})
1277
1281
 
1278
1282
  from IPython.display import display
1279
1283
  display(dispf2)
1280
1284
 
1281
- print(" ")
1285
+ #print(" ")
1282
1286
  if diff > 0:
1283
1287
  print("【注】未列出"+str(diff)+"只估值为非正数的证券:"+str(diff_list))
1284
1288
  import datetime; todaydt = datetime.date.today()
1285
- footnote="*** 期间:"+col_start_date+"至"+col_latest_date+"。数据来源: baidu/stooq/funddb/swhysc,"+str(todaydt)
1289
+ footnote="估值期间:"+col_start_date+"至"+col_latest_date+",数据来源: baidu/stooq/funddb/swhysc,"+str(todaydt)
1286
1290
  print(footnote)
1287
1291
 
1288
1292
  return
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: siat
3
- Version: 3.7.7
3
+ Version: 3.7.8
4
4
  Summary: Securities Investment Analysis Tools (siat)
5
5
  Home-page: https://pypi.org/project/siat/
6
6
  Author: Prof. WANG Dehong, International Business School, Beijing Foreign Studies University
@@ -1,6 +1,6 @@
1
1
  siat/__init__ -20240701.py,sha256=gP5uajXnJesnH5SL0ZPwq_Qhv59AG1bs4qwZv26Fo2Y,2894
2
2
  siat/__init__.py,sha256=pIo4CV3lNPKIhitmhIh_6aAfZrmzQWGNDcEnvZ7GXoc,3216
3
- siat/allin.py,sha256=JLuxVmxtlOTDelnfulK6rPoFTIhzTNe5_GShXCiKGZY,2904
3
+ siat/allin.py,sha256=x1QC29PUBUYiA6IAbQKbRvtxIEUOBx8dy5k7zh1ABT4,2970
4
4
  siat/alpha_vantage_test.py,sha256=tKr-vmuFH3CZAqwmISz6jzjPHzV1JJl3sPfZdz8aTfM,747
5
5
  siat/assets_liquidity.py,sha256=o_UZdLs693uNWPEQB2OzxDH0mdWimOmq4qe_vx1pue0,28987
6
6
  siat/assets_liquidity_test.py,sha256=UWk6HIUlizU7LQZ890fGx8LwU1jMMrIZswg8cFUJWZ8,1285
@@ -18,7 +18,7 @@ siat/capm_beta.py,sha256=cxXdRVBQBllhbfz1LeTJAIWvyRYhW54nhtNUXv4HwS0,29063
18
18
  siat/capm_beta2.py,sha256=-ZYYp1HK7SkfTR3vBKZ0QVC4Q_tbST2O4MGbX_V77J0,32031
19
19
  siat/capm_beta_test.py,sha256=ImR0c5mc4hIl714XmHztdl7qg8v1E2lycKyiqnFj6qs,1745
20
20
  siat/cmat_commons.py,sha256=Nj9Kf0alywaztVoMVeVVL_EZk5jRERJy8R8kBw88_Tg,38116
21
- siat/common.py,sha256=08Gb287rbVIAFdLilHPg3DJHLCCIhM95rJ-zhuZpudc,164861
21
+ siat/common.py,sha256=crRasPR7t5G1t95hm9JWmIob9d-iYASA4GEW5bKVhWw,173203
22
22
  siat/compare_cross.py,sha256=3iP9TH2h3w27F2ARZc7FjKcErYCzWRc-TPiymOyoVtw,24171
23
23
  siat/compare_cross_test.py,sha256=xra5XYmQGEtfIZL2h-GssdH2hLdFIhG3eoCrkDrL3gY,3473
24
24
  siat/concepts_iwencai.py,sha256=m1YEDtECRT6FqtzlKm91pt2I9d3Z_XoP59BtWdRdu8I,3061
@@ -36,6 +36,7 @@ siat/event_study.py,sha256=Q_AdnJzxxL6udCjn5LP6rdhOngsDRWPu5udFkK1CJZw,32223
36
36
  siat/exchange_bond_china.pickle,sha256=zDqdPrFacQ0nqjP_SuF6Yy87EgijIRsFvFroW7FAYYY,1265092
37
37
  siat/fama_french.py,sha256=aUTC-67t_CEPbLk4u79woW_zfZ7OCP6Fo4z5EdWCSkQ,48051
38
38
  siat/fama_french_test.py,sha256=M4O23lBKsJxhWHRluwCb3l7HSEn3OFTjzGMpehcevRg,4678
39
+ siat/fin_stmt2_yahoo.py,sha256=q4oNv-Wc30Kyuq_lRB0ZMINd33pusxQ3H2BecoFATqo,34297
39
40
  siat/financial_base.py,sha256=5u298_1OSlgLnDmhXxqvo4WgMM0JKSa_4jBYF-Ilx38,41097
40
41
  siat/financial_statements.py,sha256=xx0SMpFqAMKm6cj8uYeG2RpJE6G-RoJ3NWa33UyaVMk,25414
41
42
  siat/financial_statements_test.py,sha256=FLhx8JD-tVVWSBGux6AMz1jioXX4U4bp9DmgFHYXb_w,716
@@ -106,7 +107,7 @@ siat/security_trend2-20240620.py,sha256=QVnEcb7AyVbO77jVqfFsJffGXrX8pgJ9xCfoAKmW
106
107
  siat/security_trend2.py,sha256=mamJtFAbXC1orGgMEmp0taPk-yUqWm-jdGf64bjhn2Q,29756
107
108
  siat/setup.py,sha256=up65rQGLmTBkhtaMLowjoQXYmIsnycnm4g1SYmeQS6o,1335
108
109
  siat/shenwan index history test.py,sha256=JCVAzOSEldHalhSFa3pqD8JI_8_djPMQOxpkuYU-Esg,1418
109
- siat/stock.py,sha256=Eq8dVs9CxT1vS1BEsFQ4xCff3aXBucbT5OPW-WidNcA,158613
110
+ siat/stock.py,sha256=AO_ntNoinjXpzzs4eA1iMlsqGFoN1KvgHV6Z_ECVy-M,158658
110
111
  siat/stock_advice_linear.py,sha256=-twT7IGP-NEplkL1WPSACcNJjggRB2j4mlAQCkzOAuo,31655
111
112
  siat/stock_base.py,sha256=uISvbRyOGy8p9QREA96CVydgflBkn5L3OXOGKl8oanc,1312
112
113
  siat/stock_china.py,sha256=vm_BslG0gJm4R0-O5bNtENkMMfDP-u1TYKCKzLH9Nkk,93460
@@ -136,13 +137,13 @@ siat/translate.py,sha256=aAWk9museEsdxDstXjEy_SXiEzvKD_6fDi0W4TrBiwI,241364
136
137
  siat/translate_20240606.py,sha256=63IyHWEU3Uz9mjwyuAX3fqY4nUMdwh0ICQAgmgPXP7Y,215121
137
138
  siat/translate_241003_keep.py,sha256=un7Fqe1v35MXsja5exZgjmLzrZtt66NARZIGlyFuGGU,218747
138
139
  siat/universal_test.py,sha256=CDAOffW1Rvs-TcNN5giWVvHMlch1w4dp-w5SIV9jXL0,3936
139
- siat/valuation.py,sha256=o5FsEBkYGCgyEg2m9oF3_KdTzUxPIsEeJCYtsTDZ8OE,50514
140
+ siat/valuation.py,sha256=DF-THR8AOPFtbMG58g9coqrfnzeBIhX3PW7ZhU6WbEo,50821
140
141
  siat/valuation_china.py,sha256=CVp1IwIsF3Om0J29RGkyxZLt4n9Ug-ua_RKhLwL9fUQ,69624
141
142
  siat/valuation_market_china_test.py,sha256=gbJ0ioauuo4koTPH6WKUkqcXiQPafnbhU5eKJ6lpdLA,1571
142
143
  siat/var_model_validation.py,sha256=R0caWnuZarrRg9939hxh3vJIIpIyPfvelYmzFNZtPbo,14910
143
144
  siat/yf_name.py,sha256=r0Q67cSMMlfebEkI9h9pdGlJCooEq7hw_3M5IUs4cSI,20081
144
- siat-3.7.7.dist-info/LICENSE,sha256=NTEMMROY9_4U1szoKC3N2BLHcDd_o5uTgqdVH8tbApw,1071
145
- siat-3.7.7.dist-info/METADATA,sha256=4VE1VSyUItSI2OgG2CW5PSJ4PS7C9xZRhSt8TizioV8,8009
146
- siat-3.7.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
147
- siat-3.7.7.dist-info/top_level.txt,sha256=r1cVyL7AIKqeAmEJjNR8FMT20OmEzufDstC2gv3NvEY,5
148
- siat-3.7.7.dist-info/RECORD,,
145
+ siat-3.7.8.dist-info/LICENSE,sha256=NTEMMROY9_4U1szoKC3N2BLHcDd_o5uTgqdVH8tbApw,1071
146
+ siat-3.7.8.dist-info/METADATA,sha256=eDkDeMALXeeUIhn3YcgdI7vzLy2P4qCAR8ErzLuv5p4,8009
147
+ siat-3.7.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
148
+ siat-3.7.8.dist-info/top_level.txt,sha256=r1cVyL7AIKqeAmEJjNR8FMT20OmEzufDstC2gv3NvEY,5
149
+ siat-3.7.8.dist-info/RECORD,,
File without changes
File without changes