siat 3.4.32__py3-none-any.whl → 3.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/common.py +115 -3
- siat/grafix.py +15 -2
- siat/risk_adjusted_return2.py +50 -40
- siat/sector_china.py +432 -134
- siat/security_price2.py +1 -0
- siat/security_prices.py +44 -11
- siat/security_trend2.py +16 -7
- siat/stock.py +68 -32
- siat/stock_technical.py +6 -6
- siat/translate.py +411 -161
- siat/translate_241003_keep.py +4300 -0
- {siat-3.4.32.dist-info → siat-3.5.1.dist-info}/METADATA +1 -1
- {siat-3.4.32.dist-info → siat-3.5.1.dist-info}/RECORD +16 -15
- {siat-3.4.32.dist-info → siat-3.5.1.dist-info}/LICENSE +0 -0
- {siat-3.4.32.dist-info → siat-3.5.1.dist-info}/WHEEL +0 -0
- {siat-3.4.32.dist-info → siat-3.5.1.dist-info}/top_level.txt +0 -0
siat/sector_china.py
CHANGED
@@ -198,6 +198,7 @@ def sector_code_china(sector_name):
|
|
198
198
|
|
199
199
|
#if found > 0: print(" ")
|
200
200
|
if indicator == "行业": indicator = "证监会行业"
|
201
|
+
if indicator == "概念": indicator = "新浪概念"
|
201
202
|
|
202
203
|
if len(sector_code)>0:
|
203
204
|
"""
|
@@ -575,13 +576,19 @@ if __name__=='__main__':
|
|
575
576
|
sector="new_dlhy"
|
576
577
|
sector="yysw"
|
577
578
|
sector="xyz"
|
579
|
+
|
580
|
+
ticker='000661.SZ'; sector="gn_swym"
|
578
581
|
|
579
|
-
def sector_position_sina(ticker,sector="new_dlhy"):
|
582
|
+
def sector_position_sina(ticker,sector="new_dlhy",return_result=False):
|
580
583
|
"""
|
581
584
|
功能:套壳sector_position_china
|
582
585
|
"""
|
583
586
|
df=sector_position_china(ticker=ticker,sector=sector)
|
584
|
-
|
587
|
+
|
588
|
+
if return_result:
|
589
|
+
return df
|
590
|
+
else:
|
591
|
+
return
|
585
592
|
|
586
593
|
def sector_position_china(ticker,sector="new_dlhy"):
|
587
594
|
"""
|
@@ -614,19 +621,21 @@ def sector_position_china(ticker,sector="new_dlhy"):
|
|
614
621
|
break
|
615
622
|
except:
|
616
623
|
continue
|
624
|
+
|
617
625
|
#未找到板块代码
|
618
626
|
if sector_name == '':
|
619
|
-
print(" #
|
627
|
+
print(" #Warning(sector_position_china): unsupported sector code",sector)
|
620
628
|
return None
|
621
629
|
|
622
630
|
#板块成份股
|
623
631
|
try:
|
624
|
-
|
632
|
+
#注意:启明星行业分类没有成份股明细
|
625
633
|
df = ak.stock_sector_detail(sector=sector)
|
626
634
|
except:
|
627
|
-
print(" #
|
628
|
-
|
629
|
-
|
635
|
+
print(" #Warning(sector_position_china): sector detail not available for",sector,'by',indicator)
|
636
|
+
if indicator !="启明星行业":
|
637
|
+
print(" Possible reason: data source is self-updating.")
|
638
|
+
print(" Solution: have a breath of fresh air and try later.")
|
630
639
|
return None
|
631
640
|
|
632
641
|
#清洗原始数据: #可能同时含有数值和字符串,强制转换成数值
|
@@ -641,32 +650,40 @@ def sector_position_china(ticker,sector="new_dlhy"):
|
|
641
650
|
#检查股票代码是否存在
|
642
651
|
sdf=df[df['code']==ticker1]
|
643
652
|
if len(sdf) == 0:
|
644
|
-
print(" #
|
645
|
-
print("
|
653
|
+
print(" #Warning(sector_position_china): retrieving",ticker,"failed in sector",sector,sector_name)
|
654
|
+
print(" Solution: make sure stock code correct, try later if network is slow")
|
646
655
|
return None
|
647
656
|
sname=list(sdf['name'])[0]
|
648
657
|
|
649
658
|
#确定比较范围
|
650
659
|
complist=['changepercent','turnoverratio','settlement','per','pb','nmc','mktcap']
|
660
|
+
vminlist=['settlement','per','pb','nmc','mktcap'] #板块最小值若为零需要标记的列
|
651
661
|
compnames=['涨跌幅%','换手率%','收盘价(元)','市盈率','市净率','流通市值(亿元)','总市值(亿元)']
|
652
|
-
compdf=pd.DataFrame(columns=['指标名称','指标数值','板块分位数%','板块中位数','板块最小值','板块最大值'])
|
662
|
+
compdf=pd.DataFrame(columns=['指标名称','指标数值','板块排名','板块分位数%','板块中位数','板块最小值','板块最大值'])
|
653
663
|
|
654
664
|
from scipy.stats import percentileofscore
|
655
665
|
|
656
666
|
for c in complist:
|
657
667
|
v=list(sdf[c])[0]
|
658
|
-
vlist=list(set(list(df[c])))
|
659
|
-
vlist
|
668
|
+
#vlist=list(set(list(df[c])))
|
669
|
+
vlist=list(df[c])
|
670
|
+
vlist.sort() #升序
|
660
671
|
vmin=round(min(vlist),2)
|
672
|
+
if vmin==0.00 and c in vminlist:
|
673
|
+
vmin='--'
|
674
|
+
|
661
675
|
vmax=round(max(vlist),2)
|
662
676
|
vmedian=round(np.median(vlist),2)
|
663
677
|
|
664
678
|
pos=vlist.index(v)
|
665
679
|
#pct=round((pos+1)/len(vlist)*100,2)
|
680
|
+
#sector_rank=str(len(vlist)-pos)+'/'+str(len(vlist))
|
681
|
+
sector_rank=str(len(vlist)-pos)
|
682
|
+
|
666
683
|
pct=percentileofscore(vlist,v)
|
667
684
|
|
668
685
|
s=pd.Series({'指标名称':compnames[complist.index(c)], \
|
669
|
-
'指标数值':v,'板块分位数%':pct,'板块中位数':vmedian, \
|
686
|
+
'指标数值':v,'板块排名':sector_rank,'板块分位数%':pct,'板块中位数':vmedian, \
|
670
687
|
'板块最小值':vmin,'板块最大值':vmax})
|
671
688
|
try:
|
672
689
|
compdf=compdf.append(s,ignore_index=True)
|
@@ -695,13 +712,13 @@ def sector_position_china(ticker,sector="new_dlhy"):
|
|
695
712
|
"""
|
696
713
|
if indicator=="行业": indicator="证监会行业"
|
697
714
|
|
698
|
-
titletxt="上市公司地位分析:"+sname+","+sector_name+"行业/板块("+indicator+"分类)"
|
715
|
+
titletxt="\n上市公司地位分析:"+sname+","+sector_name+"行业/板块("+indicator+"分类)"
|
699
716
|
import datetime; stoday = datetime.date.today()
|
700
717
|
footnote1=""
|
701
718
|
footnote2="成分股总数:"+str(len(df))+",数据来源:新浪财经,"+str(stoday)+"(截至昨日)"
|
702
719
|
footnote=footnote1+footnote2
|
703
720
|
|
704
|
-
print("") #空一行
|
721
|
+
#print("") #空一行
|
705
722
|
df_display_CSS(compdf,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=2, \
|
706
723
|
first_col_align='left',second_col_align='right', \
|
707
724
|
last_col_align='right',other_col_align='right', \
|
@@ -714,7 +731,7 @@ def sector_position_china(ticker,sector="new_dlhy"):
|
|
714
731
|
|
715
732
|
#==============================================================================
|
716
733
|
|
717
|
-
def invest_concept_china(num=10):
|
734
|
+
def invest_concept_china(num=10,max_sleep=30):
|
718
735
|
"""
|
719
736
|
废弃!
|
720
737
|
功能:汇总投资概念股票名单,排行
|
@@ -738,7 +755,7 @@ def invest_concept_china(num=10):
|
|
738
755
|
|
739
756
|
import pandas as pd
|
740
757
|
totaldf=pd.DataFrame()
|
741
|
-
import time
|
758
|
+
import time; import random
|
742
759
|
i=0
|
743
760
|
#新浪财经有反爬虫,这个循环做不下去
|
744
761
|
for c in clist:
|
@@ -752,7 +769,10 @@ def invest_concept_china(num=10):
|
|
752
769
|
print(', failed:-(')
|
753
770
|
#continue
|
754
771
|
#等待一会儿,避免被禁访问
|
755
|
-
time.sleep(
|
772
|
+
#time.sleep(max_sleep)
|
773
|
+
random_int=random.randint(1,max_sleep)
|
774
|
+
time.sleep(random_int)
|
775
|
+
|
756
776
|
i=i+1
|
757
777
|
if i % 20 == 0:
|
758
778
|
print(int(i/cnum*100),'\b%',end=' ')
|
@@ -787,7 +807,7 @@ def invest_concept_china(num=10):
|
|
787
807
|
#==============================================================================
|
788
808
|
def industry_sw_list_all():
|
789
809
|
"""
|
790
|
-
功能:输出申万指数所有代码df
|
810
|
+
功能:输出申万指数所有代码df。动态,每次重新获取,自动更新!
|
791
811
|
输入:
|
792
812
|
输出:df,包括市场表征指数F,一级行业指数I,二级行业T,风格指数S,三级行业3
|
793
813
|
"""
|
@@ -799,6 +819,7 @@ def industry_sw_list_all():
|
|
799
819
|
industry=pd.DataFrame()
|
800
820
|
for s in symboltypes:
|
801
821
|
try:
|
822
|
+
#目前有问题!
|
802
823
|
dft = ak.index_realtime_sw(symbol=s)
|
803
824
|
except: continue
|
804
825
|
|
@@ -927,40 +948,67 @@ if __name__=='__main__':
|
|
927
948
|
iname='食品饮料'
|
928
949
|
iname='银行'
|
929
950
|
iname='汽车'
|
951
|
+
iname='高价股指数'
|
952
|
+
iname='申万A指'
|
953
|
+
iname='大类风格-医药医疗'
|
954
|
+
|
930
955
|
numberPerLine=5
|
931
956
|
colalign='right'
|
932
957
|
|
933
958
|
print_industry_component_sw(iname,numberPerLine=5,colalign='right')
|
934
959
|
|
935
|
-
def print_industry_component_sw(iname,numberPerLine=5,colalign='left'
|
960
|
+
def print_industry_component_sw(iname,numberPerLine=5,colalign='left', \
|
961
|
+
printout=True,return_result=False):
|
936
962
|
"""
|
937
963
|
打印申万行业的成分股,名称(代码)
|
938
964
|
"""
|
939
965
|
try:
|
940
966
|
icode=industry_sw_code(iname)
|
941
967
|
except:
|
942
|
-
print(" #Warning(print_industry_component_sw):
|
943
|
-
|
968
|
+
print(" #Warning(print_industry_component_sw): failed to find index name for",iname)
|
969
|
+
if return_result:
|
970
|
+
return []
|
971
|
+
else:
|
972
|
+
return
|
973
|
+
|
974
|
+
if icode=='':
|
975
|
+
print(" #Warning(print_industry_component_sw): relevent index code not found for",iname)
|
976
|
+
if return_result:
|
977
|
+
return []
|
978
|
+
else:
|
979
|
+
return
|
980
|
+
|
981
|
+
clist,cdf=industry_stock_sw(icode,top=1000)
|
982
|
+
if clist is None:
|
983
|
+
if return_result:
|
984
|
+
print(" #Warning(print_industry_component_sw): no component stock found for",iname)
|
985
|
+
return []
|
986
|
+
else:
|
987
|
+
return
|
944
988
|
|
945
|
-
clist,cdf=industry_stock_sw(icode,top=1000)
|
946
|
-
|
947
989
|
#cdf['icode']=cdf['证券代码'].apply(lambda x: x+'.SS' if x[:1] in ['6'] else (x+'.SZ' if x[:1] in ['0','3'] else x+'.BJ' ))
|
948
990
|
cdf['icode']=cdf['证券代码']
|
949
991
|
|
950
992
|
# 删除'证券名称'为None的行
|
951
993
|
cdf=cdf.mask(cdf.eq('None')).dropna()
|
952
|
-
cdf['name_code']=cdf.apply(lambda x: x['证券名称']+'('+x['icode']+')',axis=1)
|
953
994
|
|
954
|
-
|
955
|
-
|
995
|
+
# 合成证券名称与代码
|
996
|
+
cdf['name_code']=cdf.apply(lambda x: x['证券名称']+'('+x['icode']+')',axis=1)
|
956
997
|
ilist=list(cdf['name_code'])
|
957
998
|
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
printInLine_md(ilist,numberPerLine=numberPerLine,colalign=colalign)
|
999
|
+
if printout:
|
1000
|
+
#标题
|
1001
|
+
import datetime as dt; stoday=dt.date.today()
|
962
1002
|
|
963
|
-
|
1003
|
+
titletxt=iname+"("+icode+")行业/板块成分股:计"+str(len(ilist))+'只,按行业指数权重降序排列,'+str(stoday)
|
1004
|
+
print("\n"+titletxt,end='')
|
1005
|
+
#表格
|
1006
|
+
printInLine_md(ilist,numberPerLine=numberPerLine,colalign=colalign)
|
1007
|
+
|
1008
|
+
if return_result:
|
1009
|
+
return ilist
|
1010
|
+
else:
|
1011
|
+
return
|
964
1012
|
|
965
1013
|
#==============================================================================
|
966
1014
|
if __name__=='__main__':
|
@@ -981,8 +1029,10 @@ def print_industry_component_sw2(icode,numberPerLine=5,colalign='left'):
|
|
981
1029
|
|
982
1030
|
clist,cdf=industry_stock_sw(icode,top=1000)
|
983
1031
|
if cdf is None:
|
984
|
-
print(" Error(print_industry_component_sw2):
|
985
|
-
print("
|
1032
|
+
print(" #Error(print_industry_component_sw2): failed to retrieve industry for",icode)
|
1033
|
+
print(" Solution: make sure the industry code correct")
|
1034
|
+
print(" If the code is correct, upgrade akshare, restart jupyter and try again")
|
1035
|
+
|
986
1036
|
return
|
987
1037
|
|
988
1038
|
#cdf['icode']=cdf['证券代码'].apply(lambda x: x+'.SS' if x[:1] in ['6'] else (x+'.SZ' if x[:1] in ['0','3'] else x+'.BJ' ))
|
@@ -1003,6 +1053,8 @@ def print_industry_component_sw2(icode,numberPerLine=5,colalign='left'):
|
|
1003
1053
|
|
1004
1054
|
#==============================================================================
|
1005
1055
|
if __name__=='__main__':
|
1056
|
+
iname='大类风格--医药医疗'
|
1057
|
+
|
1006
1058
|
industry_sw_code('光伏设备')
|
1007
1059
|
|
1008
1060
|
def industry_sw_code(iname):
|
@@ -1086,7 +1138,10 @@ def industry_ranking_sw(start,end,measure='Exp Ret%', \
|
|
1086
1138
|
"""
|
1087
1139
|
完整版,全流程
|
1088
1140
|
功能:模板,遍历某类申万指数,计算某项业绩指标,汇集排序
|
1089
|
-
itype:
|
1141
|
+
itype:
|
1142
|
+
股票类指数:F表征指数,n=1/2/3行业指数,S风格指数,B大类风格指数,C金创指数?
|
1143
|
+
基金类指数:J1/2/3基础一二三级,JF特色指数
|
1144
|
+
|
1090
1145
|
period="day"; choice of {"day", "week", "month"}
|
1091
1146
|
绘图:柱状图,可选
|
1092
1147
|
"""
|
@@ -1111,13 +1166,22 @@ def industry_ranking_sw(start,end,measure='Exp Ret%', \
|
|
1111
1166
|
fail_list=[]
|
1112
1167
|
for i in ilist:
|
1113
1168
|
|
1114
|
-
print(" Processing
|
1169
|
+
print(" Processing index",i,"\b, please wait ...")
|
1115
1170
|
#抓取指数价格,选取期间范围
|
1116
1171
|
try:
|
1117
1172
|
dft = ak.index_hist_sw(symbol=i,period="day")
|
1118
1173
|
except:
|
1119
|
-
|
1120
|
-
|
1174
|
+
try:
|
1175
|
+
dft = ak.index_hist_fund_sw(symbol=i,period="day")
|
1176
|
+
dft['代码']=i
|
1177
|
+
dft['收盘']=dft['收盘指数']
|
1178
|
+
dft['开盘']=dft['收盘指数']
|
1179
|
+
dft['最高']=dft['收盘指数']
|
1180
|
+
dft['最低']=dft['收盘指数']
|
1181
|
+
dft['成交量']=0; dft['成交额']=0
|
1182
|
+
except:
|
1183
|
+
fail_list=fail_list+[i]
|
1184
|
+
continue
|
1121
1185
|
|
1122
1186
|
dft['ticker']=dft['代码']
|
1123
1187
|
dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
|
@@ -1157,10 +1221,10 @@ def industry_ranking_sw(start,end,measure='Exp Ret%', \
|
|
1157
1221
|
df['name']=df['ticker'].apply(lambda x: industry_sw_name(x))
|
1158
1222
|
df.set_index('name',inplace=True)
|
1159
1223
|
colname='value'
|
1160
|
-
titletxt="
|
1224
|
+
titletxt="行业/指数分析:业绩排名"
|
1161
1225
|
import datetime; today=datetime.date.today()
|
1162
1226
|
footnote0=ectranslate(measure)+' ==>\n'
|
1163
|
-
footnote1='
|
1227
|
+
footnote1='申万行业/指数分类,观察期:'+start+'至'+iend+'\n'
|
1164
1228
|
footnote2="数据来源: 申万宏源, "+str(today)
|
1165
1229
|
footnote=footnote0+footnote1+footnote2
|
1166
1230
|
|
@@ -1168,11 +1232,12 @@ def industry_ranking_sw(start,end,measure='Exp Ret%', \
|
|
1168
1232
|
#plot_barh2(df,colname,titletxt,footnote)
|
1169
1233
|
|
1170
1234
|
if len(fail_list) > 0:
|
1171
|
-
print(" Unable to retrieve",len(fail_list),"industry(ies) as follows:")
|
1235
|
+
print(" Unable to retrieve",len(fail_list),"industry(ies) as follows:",end='')
|
1172
1236
|
if len(fail_list) >= 10:
|
1173
1237
|
printInLine_md(fail_list,numberPerLine=10,colalign='left',font_size='16px')
|
1174
1238
|
else:
|
1175
1239
|
printInLine_md(fail_list,numberPerLine=len(fail_list),colalign='left',font_size='16px')
|
1240
|
+
print('') #空一行
|
1176
1241
|
|
1177
1242
|
return df
|
1178
1243
|
|
@@ -1225,8 +1290,17 @@ def industry_ranking_sw2(industrylist,start,end,measure='Exp Ret%', \
|
|
1225
1290
|
try:
|
1226
1291
|
dft = ak.index_hist_sw(symbol=i,period="day")
|
1227
1292
|
except:
|
1228
|
-
|
1229
|
-
|
1293
|
+
try:
|
1294
|
+
dft = ak.index_hist_fund_sw(symbol=i,period="day")
|
1295
|
+
dft['代码']=i
|
1296
|
+
dft['收盘']=dft['收盘指数']
|
1297
|
+
dft['开盘']=dft['收盘指数']
|
1298
|
+
dft['最高']=dft['收盘指数']
|
1299
|
+
dft['最低']=dft['收盘指数']
|
1300
|
+
dft['成交量']=0; dft['成交额']=0
|
1301
|
+
except:
|
1302
|
+
print(" #Warning(industry_ranking_sw2): index not found for",i)
|
1303
|
+
continue
|
1230
1304
|
|
1231
1305
|
dft['ticker']=dft['代码']
|
1232
1306
|
dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
|
@@ -1269,10 +1343,10 @@ def industry_ranking_sw2(industrylist,start,end,measure='Exp Ret%', \
|
|
1269
1343
|
df.dropna(inplace=True)
|
1270
1344
|
|
1271
1345
|
colname='value'
|
1272
|
-
titletxt="
|
1346
|
+
titletxt="行业/指数分析:业绩排名"
|
1273
1347
|
import datetime; today=datetime.date.today()
|
1274
1348
|
footnote0=ectranslate(measure)+' ==>\n'
|
1275
|
-
footnote1='
|
1349
|
+
footnote1='申万行业/指数分类,观察期:'+start+'至'+iend+'\n'
|
1276
1350
|
footnote2="数据来源: 申万宏源, "+str(today)
|
1277
1351
|
footnote=footnote0+footnote1+footnote2
|
1278
1352
|
|
@@ -1289,10 +1363,13 @@ if __name__=='__main__':
|
|
1289
1363
|
period="day"
|
1290
1364
|
industry_list='all'
|
1291
1365
|
|
1292
|
-
def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=
|
1366
|
+
def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=30):
|
1293
1367
|
"""
|
1294
1368
|
功能:遍历某类申万指数,下载数据
|
1295
|
-
itype:
|
1369
|
+
itype:
|
1370
|
+
股票类指数:F表征指数,n=1/2/3行业指数,S风格指数,B大类风格指数,C金创指数?
|
1371
|
+
基金类指数:J1/2/3基础一二三级,JF特色指数
|
1372
|
+
|
1296
1373
|
period="day"; choice of {"day", "week", "month"}
|
1297
1374
|
industry_list: 允许选择部分行业
|
1298
1375
|
"""
|
@@ -1323,7 +1400,7 @@ def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=8):
|
|
1323
1400
|
import datetime; import random; import time
|
1324
1401
|
df=pd.DataFrame()
|
1325
1402
|
|
1326
|
-
print("
|
1403
|
+
print(" Searching industry data, it takes time, please wait ...")
|
1327
1404
|
num=len(ilist)
|
1328
1405
|
if num <= 10:
|
1329
1406
|
steps=5
|
@@ -1333,16 +1410,25 @@ def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=8):
|
|
1333
1410
|
total=len(ilist)
|
1334
1411
|
fail_list=[]
|
1335
1412
|
for i in ilist:
|
1336
|
-
print_progress_percent2(i,ilist,steps=5,leading_blanks=4)
|
1413
|
+
#print_progress_percent2(i,ilist,steps=5,leading_blanks=4)
|
1337
1414
|
#print(" Retrieving information for industry",i)
|
1338
1415
|
|
1339
1416
|
#抓取指数价格
|
1340
1417
|
try:
|
1341
1418
|
dft = ak.index_hist_sw(symbol=i,period="day")
|
1342
1419
|
except:
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1420
|
+
try:
|
1421
|
+
dft = ak.index_hist_fund_sw(symbol=i,period="day")
|
1422
|
+
dft['代码']=i
|
1423
|
+
dft['收盘']=dft['收盘指数']
|
1424
|
+
dft['开盘']=dft['收盘指数']
|
1425
|
+
dft['最高']=dft['收盘指数']
|
1426
|
+
dft['最低']=dft['收盘指数']
|
1427
|
+
dft['成交量']=0; dft['成交额']=0
|
1428
|
+
except:
|
1429
|
+
#print(" #Warning(get_industry_sw): unsupported industry",i)
|
1430
|
+
fail_list=fail_list+[i]
|
1431
|
+
continue
|
1346
1432
|
|
1347
1433
|
dft['ticker']=dft['代码']
|
1348
1434
|
dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
|
@@ -1365,16 +1451,14 @@ def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=8):
|
|
1365
1451
|
current=ilist.index(i)
|
1366
1452
|
#print_progress_percent(current,total,steps=steps,leading_blanks=2)
|
1367
1453
|
|
1454
|
+
print_progress_percent2(i,ilist,steps=steps,leading_blanks=4)
|
1368
1455
|
#生成随机数睡眠,试图防止被反爬虫,不知是否管用!
|
1369
1456
|
random_int=random.randint(1,max_sleep)
|
1370
1457
|
time.sleep(random_int)
|
1371
1458
|
|
1372
1459
|
#num=list(set(list(df['ticker'])))
|
1373
|
-
if len(
|
1374
|
-
print(" Successfully retrieved",len(df),"records in",len(ilist),"industries")
|
1375
|
-
else:
|
1376
|
-
print("\n Successfully retrieved",len(df),"records in",len(ilist),"industries")
|
1377
|
-
#print(" Successfully retrieved",len(df),"records in",num,"industries")
|
1460
|
+
if len(df)>0:
|
1461
|
+
print(" Successfully retrieved",len(df),"records in",len(ilist)-len(fail_list),"industries")
|
1378
1462
|
|
1379
1463
|
if len(fail_list) > 0:
|
1380
1464
|
print(" Failed to retrieve",len(fail_list),"industry(ies) as follows:")
|
@@ -1398,7 +1482,7 @@ if __name__=='__main__':
|
|
1398
1482
|
industry_list=['850831.SW','801785.SW','801737.SW','801194.SW',
|
1399
1483
|
'801784.SW','801783.SW','801782.SW']
|
1400
1484
|
|
1401
|
-
def get_industry_sw2(industry_list,period="day",max_sleep=
|
1485
|
+
def get_industry_sw2(industry_list,period="day",max_sleep=30):
|
1402
1486
|
"""
|
1403
1487
|
功能:遍历指定的申万指数列表,下载数据
|
1404
1488
|
period="day"; choice of {"day", "week", "month"}
|
@@ -1431,9 +1515,18 @@ def get_industry_sw2(industry_list,period="day",max_sleep=8):
|
|
1431
1515
|
try:
|
1432
1516
|
dft = ak.index_hist_sw(symbol=i,period="day")
|
1433
1517
|
except:
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1518
|
+
try:
|
1519
|
+
dft = ak.index_hist_fund_sw(symbol=i,period="day")
|
1520
|
+
dft['代码']=i
|
1521
|
+
dft['收盘']=dft['收盘指数']
|
1522
|
+
dft['开盘']=dft['收盘指数']
|
1523
|
+
dft['最高']=dft['收盘指数']
|
1524
|
+
dft['最低']=dft['收盘指数']
|
1525
|
+
dft['成交量']=0; dft['成交额']=0
|
1526
|
+
except:
|
1527
|
+
#print(" #Warning(get_industry_sw): unsupported industry",i)
|
1528
|
+
fail_list=fail_list+[i]
|
1529
|
+
continue
|
1437
1530
|
|
1438
1531
|
dft['ticker']=dft['代码']
|
1439
1532
|
dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
|
@@ -1461,11 +1554,9 @@ def get_industry_sw2(industry_list,period="day",max_sleep=8):
|
|
1461
1554
|
time.sleep(random_int)
|
1462
1555
|
|
1463
1556
|
#num=list(set(list(df['ticker'])))
|
1464
|
-
if len(
|
1465
|
-
print("\n Successfully retrieved",len(df),"records in",len(ilist),"industries")
|
1466
|
-
|
1467
|
-
print(" Successfully retrieved",len(df),"records in",len(ilist),"industries")
|
1468
|
-
#print(" Successfully retrieved",len(df),"records in",num,"industries")
|
1557
|
+
if len(df) > 0:
|
1558
|
+
print("\n Successfully retrieved",len(df),"records in",len(ilist)-len(fail_list),"industries")
|
1559
|
+
|
1469
1560
|
if len(fail_list) > 0:
|
1470
1561
|
print(" Failed to retrieve",len(fail_list),"industry(ies) as follows:")
|
1471
1562
|
if len(fail_list) >= 10:
|
@@ -1478,8 +1569,8 @@ def get_industry_sw2(industry_list,period="day",max_sleep=8):
|
|
1478
1569
|
|
1479
1570
|
#==============================================================================
|
1480
1571
|
if __name__=='__main__':
|
1481
|
-
start='
|
1482
|
-
end='
|
1572
|
+
start='2023-8-31'
|
1573
|
+
end='2024-9-30'
|
1483
1574
|
df=get_industry_sw('F')
|
1484
1575
|
|
1485
1576
|
def calc_industry_sw(df,start,end):
|
@@ -1491,7 +1582,7 @@ def calc_industry_sw(df,start,end):
|
|
1491
1582
|
#检查日期的合理性
|
1492
1583
|
result,start1,end1=check_period(start,end)
|
1493
1584
|
if not result:
|
1494
|
-
print(" #
|
1585
|
+
print(" #Warning(calc_industry_sw): invalid date period",start,end)
|
1495
1586
|
return None
|
1496
1587
|
|
1497
1588
|
#屏蔽函数内print信息输出的类
|
@@ -1649,10 +1740,10 @@ def rank_industry_sw(idf,measure='Exp Ret%',industries=[], \
|
|
1649
1740
|
gdf1=gdf
|
1650
1741
|
|
1651
1742
|
if printout or graph:
|
1652
|
-
titletxt="
|
1743
|
+
titletxt="行业板块/指数分析:最新业绩排名"
|
1653
1744
|
import datetime; today=datetime.date.today()
|
1654
1745
|
footnote0=ectranslate(measure)+' -->\n\n'
|
1655
|
-
footnote1='
|
1746
|
+
footnote1='申万行业/指数分类,'+iend+'快照'
|
1656
1747
|
footnote2='观察期:'+istart+'至'+iend+','
|
1657
1748
|
footnote3="数据来源: 申万宏源, "+str(today)+'统计'
|
1658
1749
|
footnote=footnote0+footnote1+'\n'+footnote2+footnote3
|
@@ -1661,7 +1752,7 @@ def rank_industry_sw(idf,measure='Exp Ret%',industries=[], \
|
|
1661
1752
|
gdf2=gdf1.sort_values(by=measure,ascending=False)
|
1662
1753
|
gdf2.reset_index(inplace=True)
|
1663
1754
|
gdf2.index=gdf2.index+1
|
1664
|
-
gdf2.columns=['
|
1755
|
+
gdf2.columns=['行业/指数名称','行业/指数代码',ectranslate(measure),'开始日期','结束日期']
|
1665
1756
|
"""
|
1666
1757
|
print("***",titletxt,'\n')
|
1667
1758
|
alignlist=['center']+['left']*(len(list(gdf2))-1)
|
@@ -1682,7 +1773,7 @@ def rank_industry_sw(idf,measure='Exp Ret%',industries=[], \
|
|
1682
1773
|
footnote=footnote0+footnote1+'\n'+footnote2+footnote3
|
1683
1774
|
plot_barh(gdf1,colname,titletxt,footnote,axisamp=axisamp)
|
1684
1775
|
else: #使用plotly_express
|
1685
|
-
titletxt="
|
1776
|
+
titletxt="行业板块/指数业绩排名:"+ectranslate(measure)
|
1686
1777
|
footnote=footnote1+'。'+footnote2+footnote3
|
1687
1778
|
plot_barh2(gdf1,colname,titletxt,footnote)
|
1688
1779
|
else:
|
@@ -1749,6 +1840,11 @@ def compare_mindustry_sw(industry_list,measure,start,end, \
|
|
1749
1840
|
|
1750
1841
|
#获取数据
|
1751
1842
|
ddf=get_industry_sw(itype=itype,period=period,industry_list=industry_list)
|
1843
|
+
found=df_have_data(ddf)
|
1844
|
+
if not found=='Found':
|
1845
|
+
print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
|
1846
|
+
print(" Data is sometimes unavialble at certain tie points, eg public holidays")
|
1847
|
+
return None
|
1752
1848
|
|
1753
1849
|
#计算指标
|
1754
1850
|
_,idf=calc_industry_sw(ddf,start,end)
|
@@ -1792,13 +1888,13 @@ def compare_mindustry_sw(industry_list,measure,start,end, \
|
|
1792
1888
|
dfs1=dfs[(dfs.index >= istartpd) & (dfs.index <= iendpd)]
|
1793
1889
|
|
1794
1890
|
y_label=measure
|
1795
|
-
title_txt="
|
1891
|
+
title_txt="行业板块/指数分析:市场业绩趋势与评价"
|
1796
1892
|
import datetime; today = datetime.date.today()
|
1797
1893
|
if graph:
|
1798
1894
|
colname=measure
|
1799
1895
|
|
1800
1896
|
import datetime; today=datetime.date.today()
|
1801
|
-
footnote1='\n
|
1897
|
+
footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
|
1802
1898
|
footnote2="数据来源: 申万宏源, "+str(today)+'统计'
|
1803
1899
|
footnote=footnote1+footnote2
|
1804
1900
|
|
@@ -1882,6 +1978,11 @@ def compare_mindustry_sw2(industry_list,measure,start,end, \
|
|
1882
1978
|
|
1883
1979
|
#获取数据
|
1884
1980
|
ddf=get_industry_sw2(industry_list=industry_list,period=period)
|
1981
|
+
found=df_have_data(ddf)
|
1982
|
+
if not found=='Found':
|
1983
|
+
print(" #Warning(compare_mindustry_sw): data tentatively unavailable for",industry_list)
|
1984
|
+
print(" Data is sometimes unavialble at certain tie points, eg public holidays")
|
1985
|
+
return None
|
1885
1986
|
|
1886
1987
|
#计算指标
|
1887
1988
|
_,idf=calc_industry_sw(ddf,start,end)
|
@@ -1925,13 +2026,13 @@ def compare_mindustry_sw2(industry_list,measure,start,end, \
|
|
1925
2026
|
dfs1=dfs[(dfs.index >= istartpd) & (dfs.index <= iendpd)]
|
1926
2027
|
|
1927
2028
|
y_label=measure
|
1928
|
-
title_txt="行业(板块)
|
2029
|
+
title_txt="行业(板块)/指数分析:市场业绩趋势与评价"
|
1929
2030
|
import datetime; today = datetime.date.today()
|
1930
2031
|
if graph:
|
1931
2032
|
colname=measure
|
1932
|
-
title_txt="行业(板块)
|
2033
|
+
title_txt="行业(板块)/指数分析:市场业绩趋势"
|
1933
2034
|
import datetime; today=datetime.date.today()
|
1934
|
-
footnote1='\n
|
2035
|
+
footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
|
1935
2036
|
footnote2="数据来源: 申万宏源, "+str(today)+'统计'
|
1936
2037
|
footnote=footnote1+footnote2
|
1937
2038
|
|
@@ -2065,10 +2166,10 @@ def compare_industry_sw(idfall,industry_list,measure,graph=True):
|
|
2065
2166
|
if graph:
|
2066
2167
|
y_label=measure
|
2067
2168
|
colname=measure
|
2068
|
-
title_txt="
|
2169
|
+
title_txt="行业板块/指数分析:市场业绩趋势"
|
2069
2170
|
|
2070
2171
|
import datetime; today=datetime.date.today()
|
2071
|
-
footnote1='\n
|
2172
|
+
footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
|
2072
2173
|
footnote2="数据来源: 申万宏源, "+str(today)+'统计'
|
2073
2174
|
footnote=footnote1+footnote2
|
2074
2175
|
|
@@ -2124,11 +2225,11 @@ def compare_industry_sw_sharpe(idfall,industries,base_return='Annual Ret%',graph
|
|
2124
2225
|
sdf=atmp[industrylist]
|
2125
2226
|
if graph:
|
2126
2227
|
y_label='夏普比率(基于'+ectranslate(base_return)+')'
|
2127
|
-
title_txt="
|
2228
|
+
title_txt="行业板块/指数分析:市场发展趋势"
|
2128
2229
|
|
2129
2230
|
istart=sdf.index[0].strftime('%Y-%m-%d')
|
2130
2231
|
iend=sdf.index[-1].strftime('%Y-%m-%d')
|
2131
|
-
footnote1='\n
|
2232
|
+
footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
|
2132
2233
|
import datetime; today=datetime.date.today()
|
2133
2234
|
#footnote2="数据来源: 申万宏源, "+str(today)+'统计(未计入无风险利率)'
|
2134
2235
|
footnote2="数据来源: 申万宏源, "+str(today)+'统计'
|
@@ -2188,10 +2289,10 @@ def rank_industry_sw_sharpe(idfall,base_return='Exp Ret%',graph=True,axisamp=0.8
|
|
2188
2289
|
|
2189
2290
|
if graph:
|
2190
2291
|
colname=col
|
2191
|
-
titletxt="
|
2292
|
+
titletxt="行业板块/指数分析:最新业绩排名"
|
2192
2293
|
import datetime; today=datetime.date.today()
|
2193
2294
|
footnote0='夏普比率(基于'+ectranslate(base_return)+') -->\n\n'
|
2194
|
-
footnote1='
|
2295
|
+
footnote1='申万行业/指数分类,'+iend+'快照'
|
2195
2296
|
footnote2='观察期:'+istart+'至'+iend+','
|
2196
2297
|
footnote3="数据来源: 申万宏源, "+str(today)+'统计'
|
2197
2298
|
footnote=footnote0+footnote1+'\n'+footnote2+footnote3
|
@@ -2199,7 +2300,7 @@ def rank_industry_sw_sharpe(idfall,base_return='Exp Ret%',graph=True,axisamp=0.8
|
|
2199
2300
|
footnote=footnote0+footnote1+'\n'+footnote2+footnote3
|
2200
2301
|
plot_barh(dftail3,colname,titletxt,footnote,axisamp=axisamp)
|
2201
2302
|
else: #使用plotly_express
|
2202
|
-
titletxt="
|
2303
|
+
titletxt="行业板块/指数业绩排名:夏普比率(基于"+ectranslate(base_return)+')'
|
2203
2304
|
footnote=footnote1+'。'+footnote2+footnote3
|
2204
2305
|
plot_barh2(dftail3,colname,titletxt,footnote)
|
2205
2306
|
|
@@ -2212,6 +2313,8 @@ if __name__=='__main__':
|
|
2212
2313
|
industry='801193.SW'
|
2213
2314
|
industry='851811.SW'
|
2214
2315
|
industry='801181.SW'
|
2316
|
+
industry='801841.SW'
|
2317
|
+
|
2215
2318
|
top=5
|
2216
2319
|
df=industry_stock_sw(industry)
|
2217
2320
|
|
@@ -2234,10 +2337,14 @@ def industry_stock_sw(industry='801270.SW',top=5,printout=False):
|
|
2234
2337
|
try:
|
2235
2338
|
cdf = ak.index_component_sw(industry)
|
2236
2339
|
except:
|
2237
|
-
print(" #Warning(industry_stock_sw):
|
2238
|
-
print("
|
2340
|
+
print(" #Warning(industry_stock_sw): failed to retrieve component for index",industry)
|
2341
|
+
print(" Try solution: upgrade akshare, restart jupyter and try again")
|
2239
2342
|
return None,None
|
2240
2343
|
|
2344
|
+
#去重,保留最新日期的记录
|
2345
|
+
cdf.sort_values(by=['证券代码','计入日期'],ascending=[True,False],inplace=True)
|
2346
|
+
cdf.drop_duplicates(subset=['证券代码'],keep='first',inplace=True)
|
2347
|
+
|
2241
2348
|
# 删除'证券名称'为None的行
|
2242
2349
|
cdf=cdf.mask(cdf.eq('None')).dropna()
|
2243
2350
|
cdf_total=len(cdf)
|
@@ -2267,9 +2374,9 @@ def industry_stock_sw(industry='801270.SW',top=5,printout=False):
|
|
2267
2374
|
if printout:
|
2268
2375
|
if '.SW' not in industry:
|
2269
2376
|
industry=industry+'.SW'
|
2270
|
-
titletxt="
|
2377
|
+
titletxt="申万指数成分证券:"+industry_sw_name(industry)+'('+industry+')'
|
2271
2378
|
import datetime as dt; todaydt=str(dt.date.today())
|
2272
|
-
footnote="
|
2379
|
+
footnote="成分证券数量:"+str(cdf_total)+",申万宏源,"+str(todaydt)
|
2273
2380
|
|
2274
2381
|
#df_directprint(cdf1,title_txt,footnote)
|
2275
2382
|
df_display_CSS(cdf1,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=3, \
|
@@ -2332,6 +2439,11 @@ def get_industry_info_sw(start,end,itype='1'):
|
|
2332
2439
|
print("\n*** Step 1:")
|
2333
2440
|
# 获取行业历史数据,本步骤所需时间较长
|
2334
2441
|
df=get_industry_sw(itype=itype)
|
2442
|
+
found=df_have_data(df)
|
2443
|
+
if not found=='Found':
|
2444
|
+
print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
|
2445
|
+
print(" Data is sometimes unavialble at certain time points, try again later")
|
2446
|
+
return None
|
2335
2447
|
|
2336
2448
|
print("\n*** Step 2:")
|
2337
2449
|
# 计算基础数据,本步骤所需时间较长
|
@@ -2366,6 +2478,11 @@ def get_industry_info_sw2(industry_list,start,end):
|
|
2366
2478
|
print("\n*** Step 1:")
|
2367
2479
|
# 获取行业历史数据,本步骤所需时间较长
|
2368
2480
|
df=get_industry_sw2(industry_list)
|
2481
|
+
found=df_have_data(df)
|
2482
|
+
if not found=='Found':
|
2483
|
+
print(" #Warning(compare_mindustry_sw): data tentatively unavailable for",industry_list)
|
2484
|
+
print(" Data is sometimes unavialble at certain time points, try again later")
|
2485
|
+
return None
|
2369
2486
|
|
2370
2487
|
print("\n*** Step 2:")
|
2371
2488
|
# 计算基础数据,本步骤所需时间较长
|
@@ -2630,7 +2747,7 @@ def industry_correlation_sw(df,tickers,start,end, \
|
|
2630
2747
|
elif pv< 0.001:
|
2631
2748
|
ax1.text(n+widthx,m+widthy,'***',ha = 'center',color = 'k',fontdict=font_dict)
|
2632
2749
|
|
2633
|
-
plt.title("
|
2750
|
+
plt.title("行业板块/指数"+info_type_cn+"之间的相关性")
|
2634
2751
|
plt.tick_params(labelsize=corr_size)
|
2635
2752
|
|
2636
2753
|
footnote1="\n显著性数值:***非常显著(<0.001),**很显著(<0.01),*显著(<0.05),其余为不显著"
|
@@ -2638,7 +2755,7 @@ def industry_correlation_sw(df,tickers,start,end, \
|
|
2638
2755
|
|
2639
2756
|
footnote3="\n观察期间: "+start+'至'+end
|
2640
2757
|
import datetime as dt; stoday=dt.date.today()
|
2641
|
-
footnote4=";来源:Sina/EM,"+str(stoday)+"
|
2758
|
+
footnote4=";来源:Sina/EM,"+str(stoday)+";基于申万行业/指数分类"
|
2642
2759
|
|
2643
2760
|
fontxlabel={'size':corr_size}
|
2644
2761
|
plt.xlabel(footnote1+footnote2+footnote3+footnote4,fontxlabel)
|
@@ -2842,8 +2959,17 @@ def get_sw_index(ticker,start,end):
|
|
2842
2959
|
try:
|
2843
2960
|
dft = ak.index_hist_sw(symbol=symbol,period="day")
|
2844
2961
|
except:
|
2845
|
-
|
2846
|
-
|
2962
|
+
try:
|
2963
|
+
dft = ak.index_hist_fund_sw(symbol=symbol,period="day")
|
2964
|
+
dft['代码']=symbol
|
2965
|
+
dft['收盘']=dft['收盘指数']
|
2966
|
+
dft['开盘']=dft['收盘指数']
|
2967
|
+
dft['最高']=dft['收盘指数']
|
2968
|
+
dft['最低']=dft['收盘指数']
|
2969
|
+
dft['成交量']=0; dft['成交额']=0
|
2970
|
+
except:
|
2971
|
+
print(" #Error(get_sw_index): failed to retrieve index",symbol)
|
2972
|
+
return None
|
2847
2973
|
|
2848
2974
|
dft['ticker']=dft['代码'].apply(lambda x: x+'.SW')
|
2849
2975
|
|
@@ -2956,6 +3082,8 @@ if __name__ =="__main__":
|
|
2956
3082
|
|
2957
3083
|
|
2958
3084
|
def industry_scan_china(sw_level='F', \
|
3085
|
+
indicator='Exp Adj Ret%', \
|
3086
|
+
base_return='Exp Adj Ret%', \
|
2959
3087
|
start='MRY',end='default', \
|
2960
3088
|
RF=0, \
|
2961
3089
|
printout='smart', \
|
@@ -2965,16 +3093,25 @@ def industry_scan_china(sw_level='F', \
|
|
2965
3093
|
申万行业分类sw_level:F--市场表征(默认),S--投资风格(策略),B--大类风格,C--金创,
|
2966
3094
|
1--一级行业,2--二级行业,3--三级行业
|
2967
3095
|
评估期间start与end:允许MRM/MRQ/MRY(默认)/YTD/L3Y(近三年)/L5Y(近五年)
|
3096
|
+
base_return:计算sharpe和sortino比率使用的收益率类型。
|
3097
|
+
当indicator不是sharpe或sortino比率时,base_return需要与indicator保持一致。
|
2968
3098
|
RF:年化无风险收益率,默认0,可参照一年期国债收益率
|
2969
3099
|
筛选方式printout:smart--收益前10名与后10名(默认),winner--仅限收益为正的行业,
|
2970
3100
|
loser--仅限收益为负的行业,50--收益前50名,-10--收益后10名,all--所有行业
|
2971
3101
|
"""
|
2972
|
-
indicator='Exp Ret%'
|
3102
|
+
#indicator='Exp Ret%'
|
2973
3103
|
|
2974
|
-
print("
|
3104
|
+
#print(" Evaluating industry performance, it may take up to hours ... ...")
|
3105
|
+
|
3106
|
+
#节省获取数据的量和时间
|
3107
|
+
if start=='MRY' and end=='default': #默认参数
|
3108
|
+
if 'Weekly' in indicator or 'Weekly' in base_return:
|
3109
|
+
start='MRM'
|
3110
|
+
if 'Monthly' in indicator or 'Monthly' in base_return:
|
3111
|
+
start='MRQ'
|
2975
3112
|
|
2976
3113
|
# 检查申万行业
|
2977
|
-
sw_level_list=['1','2','3','F','S','B','C']
|
3114
|
+
sw_level_list=['1','2','3','F','S','B','C','J1','J2','J3','JF']
|
2978
3115
|
if sw_level not in sw_level_list:
|
2979
3116
|
print(" #Warning(industry_scan_china): invalid Shenwan industry types for",sw_level)
|
2980
3117
|
print(" Valid Shenwan industry types:",end='')
|
@@ -2982,14 +3119,38 @@ def industry_scan_china(sw_level='F', \
|
|
2982
3119
|
return None
|
2983
3120
|
|
2984
3121
|
# 检查支持的指标
|
2985
|
-
|
2986
|
-
'
|
3122
|
+
base_return_list=['Exp Ret%','Exp Ret Volatility%','Exp Ret LPSD%', \
|
3123
|
+
'Exp Adj Ret%','Exp Adj Ret Volatility%','Exp Adj Ret LPSD%', \
|
3124
|
+
|
3125
|
+
'Annual Ret%','Annual Ret Volatility%','Annual Ret LPSD%', \
|
3126
|
+
'Annual Adj Ret%','Annual Adj Ret Volatility%','Annual Adj Ret LPSD%', \
|
3127
|
+
|
3128
|
+
'Quarterly Ret%','Quarterly Ret Volatility%','Quarterly Ret LPSD%', \
|
3129
|
+
'Quarterly Adj Ret%','Quarterly Adj Ret Volatility%','Quarterly Adj Ret LPSD%', \
|
3130
|
+
|
3131
|
+
'Monthly Ret%','Monthly Ret Volatility%','Monthly Ret LPSD%', \
|
3132
|
+
'Monthly Adj Ret%','Monthly Adj Ret Volatility%','Monthly Adj Ret LPSD%', \
|
3133
|
+
|
3134
|
+
'Weekly Ret%','Weekly Ret Volatility%','Weekly Ret LPSD%', \
|
3135
|
+
'Weekly Adj Ret%','Weekly Adj Ret Volatility%','Weekly Adj Ret LPSD%', \
|
3136
|
+
]
|
3137
|
+
if base_return not in base_return_list:
|
3138
|
+
print(" #Warning(industry_scan_china): unsupported base return type for",base_return)
|
3139
|
+
print(" Supported base return:")
|
3140
|
+
printlist(base_return_list,numperline=5,beforehand=' ',separator=', ')
|
3141
|
+
return None
|
3142
|
+
|
3143
|
+
|
3144
|
+
indicator_list=base_return_list + ['sharpe','sortino']
|
3145
|
+
|
3146
|
+
if indicator.lower() in ['sharpe','sortino']:
|
3147
|
+
indicator=indicator.lower()
|
3148
|
+
|
2987
3149
|
if indicator not in indicator_list:
|
2988
3150
|
print(" #Warning(industry_scan_china): unsupported indicator for",indicator)
|
2989
3151
|
print(" Supported indicators:")
|
2990
3152
|
printlist(indicator_list,numperline=5,beforehand=' ',separator=', ')
|
2991
3153
|
return None
|
2992
|
-
|
2993
3154
|
|
2994
3155
|
# 检查日期:
|
2995
3156
|
fromdate,todate=start_end_preprocess(start,end)
|
@@ -3026,7 +3187,8 @@ def industry_scan_china(sw_level='F', \
|
|
3026
3187
|
fromdate=date_adjust(todate,adjust=-31)
|
3027
3188
|
"""
|
3028
3189
|
# 获取申万行业类别内部标识
|
3029
|
-
itype_list=['1','2','3','F','S','B','C']
|
3190
|
+
#itype_list=['1','2','3','F','S','B','C']
|
3191
|
+
itype_list=sw_level_list
|
3030
3192
|
pos=sw_level_list.index(sw_level)
|
3031
3193
|
itype=itype_list[pos]
|
3032
3194
|
|
@@ -3036,21 +3198,68 @@ def industry_scan_china(sw_level='F', \
|
|
3036
3198
|
|
3037
3199
|
# 循环获取行业指数,简单计算指数增长率,排序
|
3038
3200
|
#print(" Retrieving industry info, which may need up to hours, take a break ...")
|
3039
|
-
print("\n *** Step 1: Retrieving information")
|
3201
|
+
#print("\n *** Step 1: Retrieving industry information")
|
3202
|
+
print(" *** Step 1: ")
|
3040
3203
|
# 获取行业历史数据,本步骤所需时间较长
|
3041
3204
|
df=get_industry_sw(itype=itype)
|
3205
|
+
found=df_have_data(df)
|
3206
|
+
if not found=='Found':
|
3207
|
+
print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
|
3208
|
+
print(" Data is sometimes unavialble at certain time points, try again later")
|
3209
|
+
return None
|
3042
3210
|
|
3043
3211
|
# 计算指标
|
3044
|
-
print("\n *** Step 2: Computing indicators")
|
3212
|
+
#print("\n *** Step 2: Computing performance indicators")
|
3213
|
+
print("\n *** Step 2: ")
|
3045
3214
|
# 计算基础数据,本步骤所需时间较长
|
3046
3215
|
idf,idfall=calc_industry_sw(df,fromdate,todate)
|
3047
3216
|
|
3048
|
-
|
3049
|
-
|
3050
|
-
|
3051
|
-
|
3052
|
-
|
3053
|
-
|
3217
|
+
#设置base_return:非['sharpe','sortino']时
|
3218
|
+
if not indicator in ['sharpe','sortino']:
|
3219
|
+
#以下的判断顺序不可轻易改变
|
3220
|
+
if 'Ret Volatility%' in indicator:
|
3221
|
+
base_return=indicator.replace('Ret Volatility%','Ret%')
|
3222
|
+
elif 'Ret Volatility' in indicator:
|
3223
|
+
base_return=indicator.replace('Ret Volatility','Ret')
|
3224
|
+
elif 'Ret LPSD%' in indicator:
|
3225
|
+
base_return=indicator.replace('Ret LPSD%','Ret%')
|
3226
|
+
elif 'Ret LPSD' in indicator:
|
3227
|
+
base_return=indicator.replace('Ret LPSD','Ret')
|
3228
|
+
else:
|
3229
|
+
base_return=indicator
|
3230
|
+
|
3231
|
+
|
3232
|
+
#计算期间内的无风险收益率:RF为小数,而idf中的收益率为百分数
|
3233
|
+
if '%' in base_return:
|
3234
|
+
RFS=RF*100 #百分数
|
3235
|
+
|
3236
|
+
base_return_volatility=base_return.replace('Ret%','Ret Volatility%')
|
3237
|
+
base_return_lpsd=base_return.replace('Ret%','Ret LPSD%')
|
3238
|
+
else:
|
3239
|
+
RFS=RF
|
3240
|
+
|
3241
|
+
base_return_volatility=base_return.replace('Ret','Ret Volatility')
|
3242
|
+
base_return_lpsd=base_return.replace('Ret','Ret LPSD')
|
3243
|
+
|
3244
|
+
if 'Exp' in base_return:
|
3245
|
+
RF_daily=RFS/365
|
3246
|
+
RF_days=RF_daily * calculate_days(fromdate, todate)
|
3247
|
+
|
3248
|
+
elif 'Annual' in base_return:
|
3249
|
+
RF_days=RFS
|
3250
|
+
|
3251
|
+
elif 'Quarterly' in base_return:
|
3252
|
+
RF_days=RFS/4
|
3253
|
+
|
3254
|
+
elif 'Monthly' in base_return:
|
3255
|
+
RF_days=RFS/12
|
3256
|
+
|
3257
|
+
elif 'Weekly' in base_return:
|
3258
|
+
RF_days=RFS/52
|
3259
|
+
|
3260
|
+
idf['sharpe']=(idf[base_return]-RF_days) / idf[base_return_volatility]
|
3261
|
+
idf['sortino']=(idf[base_return]-RF_days) / idf[base_return_lpsd]
|
3262
|
+
|
3054
3263
|
|
3055
3264
|
# 排序
|
3056
3265
|
idf.sort_values(indicator,ascending=False,inplace=True)
|
@@ -3083,9 +3292,9 @@ def industry_scan_china(sw_level='F', \
|
|
3083
3292
|
if printout=='all':
|
3084
3293
|
df_prt=df2
|
3085
3294
|
elif printout=='winner':
|
3086
|
-
df_prt=df2[df2[
|
3295
|
+
df_prt=df2[df2[indicator] > 0]
|
3087
3296
|
elif printout=='loser':
|
3088
|
-
df_prt=df2[df2[
|
3297
|
+
df_prt=df2[df2[indicator] <= 0]
|
3089
3298
|
else:
|
3090
3299
|
try:
|
3091
3300
|
printoutd=int(printout)
|
@@ -3097,10 +3306,10 @@ def industry_scan_china(sw_level='F', \
|
|
3097
3306
|
pass
|
3098
3307
|
|
3099
3308
|
# 标题改中文
|
3100
|
-
df_prt.rename(columns={'Industry Code':'
|
3101
|
-
|
3102
|
-
|
3103
|
-
|
3309
|
+
df_prt.rename(columns={'Industry Code':'代码','Industry Name':'名称', \
|
3310
|
+
base_return:ectranslate(base_return), \
|
3311
|
+
base_return_volatility:ectranslate(base_return_volatility), \
|
3312
|
+
base_return_lpsd:ectranslate(base_return_lpsd), \
|
3104
3313
|
'sharpe':'夏普比率','sortino':'索替诺比率'}, \
|
3105
3314
|
inplace=True)
|
3106
3315
|
|
@@ -3119,13 +3328,21 @@ def industry_scan_china(sw_level='F', \
|
|
3119
3328
|
sw_level_txt='申万二级行业'
|
3120
3329
|
elif sw_level=='3':
|
3121
3330
|
sw_level_txt='申万三级行业'
|
3331
|
+
elif sw_level=='J1':
|
3332
|
+
sw_level_txt='申万基金基础一级指数'
|
3333
|
+
elif sw_level=='J2':
|
3334
|
+
sw_level_txt='申万基金基础二级指数'
|
3335
|
+
elif sw_level=='J3':
|
3336
|
+
sw_level_txt='申万基金基础三级指数'
|
3337
|
+
elif sw_level=='JF':
|
3338
|
+
sw_level_txt='申万基金特色指数'
|
3122
3339
|
else:
|
3123
3340
|
sw_level_txt='未知类别'
|
3124
3341
|
|
3125
3342
|
if printout=='all':
|
3126
3343
|
printout_txt='所有指数'
|
3127
3344
|
elif printout=='smart':
|
3128
|
-
printout_txt='
|
3345
|
+
printout_txt='前/后十个行业'
|
3129
3346
|
if len(df2) <=20:
|
3130
3347
|
printout_txt='所有指数'
|
3131
3348
|
elif printout=='winner':
|
@@ -3137,16 +3354,17 @@ def industry_scan_china(sw_level='F', \
|
|
3137
3354
|
num=int(printout)
|
3138
3355
|
if len(df2) > abs(num):
|
3139
3356
|
if num > 0:
|
3140
|
-
printout_txt='
|
3357
|
+
printout_txt='收益排名前'+printout+"名"
|
3141
3358
|
else:
|
3142
|
-
printout_txt='
|
3359
|
+
printout_txt='收益排名后'+str(abs(num))+"名"
|
3143
3360
|
else:
|
3144
3361
|
printout_txt='所有指数'
|
3145
3362
|
except:
|
3146
3363
|
printout_txt='未知筛选方式'
|
3147
3364
|
|
3148
3365
|
#titletxt="申万行业业绩排行榜:"+sw_level_txt+',共'+str(len(df_prt))+"个指数符合条件"
|
3149
|
-
titletxt="行业业绩排行榜:"+sw_level_txt+',筛选方式:'+printout_txt
|
3366
|
+
#titletxt="行业业绩排行榜:"+sw_level_txt+','+ectranslate(indicator)+',筛选方式:'+printout_txt
|
3367
|
+
titletxt="申万宏源行业/指数业绩龙虎榜:"+sw_level_txt+','+printout_txt
|
3150
3368
|
#print("\n***",titletxt,'\n')
|
3151
3369
|
"""
|
3152
3370
|
alignlist=['center']+['left']*(len(list(df_prt))-1)
|
@@ -3154,18 +3372,39 @@ def industry_scan_china(sw_level='F', \
|
|
3154
3372
|
"""
|
3155
3373
|
#print("\n *** 数据来源:综合申万宏源/东方财富/新浪财经,",todaydt,"\b;分析期间:",fromdate+'至'+todate)
|
3156
3374
|
#footnote1="筛选方式:all-所有,smart-收益最高最低各10个,winner-收益为正,loser-收益为负"
|
3157
|
-
|
3158
|
-
|
3159
|
-
footnote=footnote2
|
3375
|
+
footnote1="注:夏普/索梯诺比率基于"+ectranslate(base_return)+",年化无风险利率"+str(round(RF*100,4))+'%'
|
3376
|
+
footnote2="评估期间:"+str(fromdate)+'至'+str(todate)+",数据来源:申万宏源,"+str(todaydt)+"制表"
|
3377
|
+
footnote=footnote1+'\n'+footnote2
|
3378
|
+
#footnote=footnote2
|
3160
3379
|
|
3161
3380
|
#确定表格字体大小
|
3162
3381
|
titile_font_size=font_size
|
3163
3382
|
heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
|
3164
3383
|
|
3384
|
+
df_prt['序号']=df_prt.index
|
3385
|
+
if indicator=='sharpe':
|
3386
|
+
df_prt=df_prt[['序号','名称','代码','夏普比率','索替诺比率', \
|
3387
|
+
ectranslate(base_return),ectranslate(base_return_volatility),ectranslate(base_return_lpsd)]]
|
3388
|
+
elif indicator=='sortino':
|
3389
|
+
df_prt=df_prt[['序号','名称','代码','索替诺比率','夏普比率', \
|
3390
|
+
ectranslate(base_return),ectranslate(base_return_volatility),ectranslate(base_return_lpsd)]]
|
3391
|
+
|
3392
|
+
elif 'Volatility' in indicator:
|
3393
|
+
df_prt=df_prt[['序号','名称','代码',ectranslate(base_return_volatility),ectranslate(base_return_lpsd), \
|
3394
|
+
ectranslate(base_return),'夏普比率','索替诺比率']]
|
3395
|
+
elif 'LPSD' in indicator:
|
3396
|
+
df_prt=df_prt[['序号','名称','代码',ectranslate(base_return_lpsd),ectranslate(base_return_volatility), \
|
3397
|
+
ectranslate(base_return),'夏普比率','索替诺比率']]
|
3398
|
+
else:
|
3399
|
+
df_prt=df_prt[['序号','名称','代码',ectranslate(base_return), \
|
3400
|
+
ectranslate(base_return_volatility),ectranslate(base_return_lpsd),'夏普比率','索替诺比率']]
|
3401
|
+
|
3402
|
+
#显示表格
|
3165
3403
|
df_display_CSS(df_prt,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
|
3166
|
-
first_col_align='
|
3404
|
+
first_col_align='center',second_col_align='left', \
|
3405
|
+
last_col_align='center',other_col_align='center', \
|
3167
3406
|
titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
|
3168
|
-
|
3407
|
+
data_font_size=data_font_size)
|
3169
3408
|
|
3170
3409
|
return df2
|
3171
3410
|
|
@@ -3193,12 +3432,12 @@ if __name__=='__main__':
|
|
3193
3432
|
|
3194
3433
|
find_industry_sw(ticker)
|
3195
3434
|
|
3196
|
-
def find_industry_sw(ticker,level='1',ticker_order=True,max_sleep=
|
3435
|
+
def find_industry_sw(ticker,level='1',ticker_order=True,max_sleep=30):
|
3197
3436
|
"""
|
3198
3437
|
功能:寻找一只或一组股票所属的申万行业,支持股票代码和股票名称。
|
3199
3438
|
level='1':默认只查找申万1级行业,以便节省时间
|
3200
3439
|
ticker_order=True:默认输出结果按照ticker中的顺序,而非按照所属行业排序
|
3201
|
-
max_sleep
|
3440
|
+
max_sleep:为防止反爬虫,默认每次爬虫后睡眠最多几秒钟
|
3202
3441
|
"""
|
3203
3442
|
print(" Searching shenwan industries for securities ... ...")
|
3204
3443
|
|
@@ -3653,7 +3892,7 @@ def get_stock_industry_sw(ticker):
|
|
3653
3892
|
|
3654
3893
|
return industry
|
3655
3894
|
else:
|
3656
|
-
return
|
3895
|
+
return ''
|
3657
3896
|
|
3658
3897
|
#==============================================================================
|
3659
3898
|
if __name__ == '__main__':
|
@@ -3670,28 +3909,87 @@ def stock_peers_sw(ticker):
|
|
3670
3909
|
try:
|
3671
3910
|
hangye=get_stock_industry_sw(ticker)
|
3672
3911
|
except:
|
3673
|
-
print(" #Warning(stock_peers_sw):
|
3912
|
+
print(" #Warning(stock_peers_sw): industry info not found for",ticker)
|
3674
3913
|
return
|
3675
3914
|
|
3915
|
+
if hangye=='':
|
3916
|
+
print(" #Warning(stock_peers_sw): found empty industry for",ticker)
|
3917
|
+
return
|
3918
|
+
|
3919
|
+
ilist=[]; hangye_final=''
|
3676
3920
|
#三级行业优先
|
3677
3921
|
hangye3=hangye+'Ⅲ'
|
3678
3922
|
try:
|
3679
|
-
print_industry_component_sw(iname=hangye3)
|
3923
|
+
ilist=print_industry_component_sw(iname=hangye3,return_result=True)
|
3924
|
+
hangye_final=hangye3
|
3680
3925
|
except:
|
3681
3926
|
#二级行业次优先
|
3682
3927
|
hangye2=hangye+'Ⅱ'
|
3683
3928
|
try:
|
3684
|
-
print_industry_component_sw(iname=hangye2)
|
3929
|
+
ilist=print_industry_component_sw(iname=hangye2,return_result=True)
|
3930
|
+
hangye_final=hangye2
|
3685
3931
|
except:
|
3686
3932
|
try:
|
3687
|
-
print_industry_component_sw(iname=hangye)
|
3933
|
+
ilist=print_industry_component_sw(iname=hangye,return_result=True)
|
3934
|
+
hangye_final=hangye
|
3688
3935
|
except:
|
3689
3936
|
print("\n #Warning(stock_peers_sw): failed to search peers for",ticker)
|
3690
3937
|
print(" Possible solutions:")
|
3691
|
-
print("
|
3692
|
-
print("
|
3693
|
-
|
3694
|
-
|
3938
|
+
print(" Try first: upgrade akshare, restart Jupyter and try again")
|
3939
|
+
print(" If not working, uninstall anaconda and reinstall a newer version")
|
3940
|
+
|
3941
|
+
#查找股票在行业板块中的位置
|
3942
|
+
if not ilist=='':
|
3943
|
+
ticker6=ticker[:6]
|
3944
|
+
for i in ilist:
|
3945
|
+
if ticker6 in i:
|
3946
|
+
ticker_item=i
|
3947
|
+
ticker_pos=ilist.index(i)+1
|
3948
|
+
break
|
3949
|
+
|
3950
|
+
footnote0="注:"
|
3951
|
+
footnote1=ticker_item+"在申万行业"+hangye_final+"指数中的权重排名为"+str(ticker_pos)+'/'+str(len(ilist))
|
3952
|
+
footnote2="该指数的权重排名依据主要包括公司的市值规模、流动性以及市场代表性"
|
3953
|
+
footnote=footnote0+'\n'+footnote1+'\n'+footnote2
|
3954
|
+
print(footnote)
|
3695
3955
|
return
|
3696
3956
|
|
3957
|
+
#==============================================================================
|
3958
|
+
if __name__ == '__main__':
|
3959
|
+
sw_index=['绩优股指数','大盘指数','中市盈率指数','高市净率指数',]
|
3960
|
+
sw_index=['大类风格-先进制造','大类风格--医药医疗']
|
3961
|
+
|
3962
|
+
index_intersection_sw(sw_index)
|
3963
|
+
|
3964
|
+
def index_intersection_sw(sw_index=[]):
|
3965
|
+
#寻找多个申万指数中共同的成分股
|
3966
|
+
if len(sw_index)==0:
|
3967
|
+
print(" #Warning(stock_intersection_sw): no index found for intersection")
|
3968
|
+
return
|
3969
|
+
|
3970
|
+
if isinstance(sw_index,str):
|
3971
|
+
sw_index=[sw_index]
|
3972
|
+
|
3973
|
+
result_list=[]
|
3974
|
+
for i in sw_index:
|
3975
|
+
try:
|
3976
|
+
ilist=print_industry_component_sw(i,printout=False,return_result=True)
|
3977
|
+
except:
|
3978
|
+
print(" #Warning(stock_intersection_sw): failed to find component for index",i)
|
3979
|
+
continue
|
3980
|
+
|
3981
|
+
if len(result_list)==0:
|
3982
|
+
result_list=[ilist]
|
3983
|
+
else:
|
3984
|
+
result_list=result_list+[ilist]
|
3985
|
+
|
3986
|
+
list_intersection(result_list)
|
3987
|
+
|
3988
|
+
return
|
3989
|
+
#==============================================================================
|
3990
|
+
#==============================================================================
|
3991
|
+
#==============================================================================
|
3992
|
+
#==============================================================================
|
3993
|
+
#==============================================================================
|
3994
|
+
#==============================================================================
|
3697
3995
|
|