siat 3.10.132__py3-none-any.whl → 3.10.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. siat/__init__.py +0 -0
  2. siat/allin.py +0 -0
  3. siat/assets_liquidity.py +0 -0
  4. siat/beta_adjustment.py +0 -0
  5. siat/beta_adjustment_china.py +0 -0
  6. siat/blockchain.py +0 -0
  7. siat/bond.py +0 -0
  8. siat/bond_base.py +0 -0
  9. siat/bond_china.py +0 -0
  10. siat/bond_zh_sina.py +0 -0
  11. siat/capm_beta.py +0 -0
  12. siat/capm_beta2.py +0 -0
  13. siat/compare_cross.py +0 -0
  14. siat/copyrights.py +0 -0
  15. siat/cryptocurrency.py +0 -0
  16. siat/economy.py +0 -0
  17. siat/economy2.py +0 -0
  18. siat/esg.py +0 -0
  19. siat/event_study.py +0 -0
  20. siat/exchange_bond_china.pickle +0 -0
  21. siat/fama_french.py +0 -0
  22. siat/fin_stmt2_yahoo.py +0 -0
  23. siat/financial_base.py +0 -0
  24. siat/financial_statements.py +0 -0
  25. siat/financials.py +0 -0
  26. siat/financials2.py +0 -0
  27. siat/financials_china.py +0 -0
  28. siat/financials_china2.py +0 -0
  29. siat/fund.py +0 -0
  30. siat/fund_china.pickle +0 -0
  31. siat/fund_china.py +0 -0
  32. siat/future_china.py +0 -0
  33. siat/google_authenticator.py +0 -0
  34. siat/grafix.py +0 -0
  35. siat/holding_risk.py +0 -0
  36. siat/luchy_draw.py +0 -0
  37. siat/market_china.py +0 -0
  38. siat/markowitz.py +0 -0
  39. siat/markowitz2.py +0 -0
  40. siat/markowitz2_20250704.py +0 -0
  41. siat/markowitz2_20250705.py +0 -0
  42. siat/markowitz_simple.py +0 -0
  43. siat/ml_cases.py +0 -0
  44. siat/ml_cases_example.py +0 -0
  45. siat/option_china.py +0 -0
  46. siat/option_pricing.py +0 -0
  47. siat/other_indexes.py +0 -0
  48. siat/risk_adjusted_return.py +0 -0
  49. siat/risk_adjusted_return2.py +0 -0
  50. siat/risk_evaluation.py +0 -0
  51. siat/risk_free_rate.py +0 -0
  52. siat/sector_china.py +0 -0
  53. siat/security_price2.py +0 -0
  54. siat/security_prices.py +40 -2
  55. siat/security_trend.py +0 -0
  56. siat/security_trend2.py +0 -0
  57. siat/stock.py +0 -0
  58. siat/stock_advice_linear.py +0 -0
  59. siat/stock_base.py +0 -0
  60. siat/stock_china.py +0 -0
  61. siat/stock_info.pickle +0 -0
  62. siat/stock_prices_kneighbors.py +0 -0
  63. siat/stock_prices_linear.py +0 -0
  64. siat/stock_profile.py +0 -0
  65. siat/stock_technical.py +0 -0
  66. siat/stooq.py +0 -0
  67. siat/transaction.py +0 -0
  68. siat/translate.py +0 -0
  69. siat/valuation.py +0 -0
  70. siat/valuation_china.py +0 -0
  71. siat/var_model_validation.py +0 -0
  72. siat/yf_name.py +0 -0
  73. {siat-3.10.132.dist-info/licenses → siat-3.10.133.dist-info}/LICENSE +0 -0
  74. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/METADATA +232 -235
  75. siat-3.10.133.dist-info/RECORD +78 -0
  76. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/WHEEL +1 -1
  77. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/top_level.txt +0 -1
  78. build/lib/build/lib/siat/__init__.py +0 -75
  79. build/lib/build/lib/siat/allin.py +0 -137
  80. build/lib/build/lib/siat/assets_liquidity.py +0 -915
  81. build/lib/build/lib/siat/beta_adjustment.py +0 -1058
  82. build/lib/build/lib/siat/beta_adjustment_china.py +0 -548
  83. build/lib/build/lib/siat/blockchain.py +0 -143
  84. build/lib/build/lib/siat/bond.py +0 -2900
  85. build/lib/build/lib/siat/bond_base.py +0 -992
  86. build/lib/build/lib/siat/bond_china.py +0 -100
  87. build/lib/build/lib/siat/bond_zh_sina.py +0 -143
  88. build/lib/build/lib/siat/capm_beta.py +0 -783
  89. build/lib/build/lib/siat/capm_beta2.py +0 -887
  90. build/lib/build/lib/siat/common.py +0 -5360
  91. build/lib/build/lib/siat/compare_cross.py +0 -642
  92. build/lib/build/lib/siat/copyrights.py +0 -18
  93. build/lib/build/lib/siat/cryptocurrency.py +0 -667
  94. build/lib/build/lib/siat/economy.py +0 -1471
  95. build/lib/build/lib/siat/economy2.py +0 -1853
  96. build/lib/build/lib/siat/esg.py +0 -536
  97. build/lib/build/lib/siat/event_study.py +0 -815
  98. build/lib/build/lib/siat/fama_french.py +0 -1521
  99. build/lib/build/lib/siat/fin_stmt2_yahoo.py +0 -982
  100. build/lib/build/lib/siat/financial_base.py +0 -1160
  101. build/lib/build/lib/siat/financial_statements.py +0 -598
  102. build/lib/build/lib/siat/financials.py +0 -2339
  103. build/lib/build/lib/siat/financials2.py +0 -1278
  104. build/lib/build/lib/siat/financials_china.py +0 -4433
  105. build/lib/build/lib/siat/financials_china2.py +0 -2212
  106. build/lib/build/lib/siat/fund.py +0 -629
  107. build/lib/build/lib/siat/fund_china.py +0 -3307
  108. build/lib/build/lib/siat/future_china.py +0 -551
  109. build/lib/build/lib/siat/google_authenticator.py +0 -47
  110. build/lib/build/lib/siat/grafix.py +0 -3636
  111. build/lib/build/lib/siat/holding_risk.py +0 -867
  112. build/lib/build/lib/siat/luchy_draw.py +0 -638
  113. build/lib/build/lib/siat/market_china.py +0 -1168
  114. build/lib/build/lib/siat/markowitz.py +0 -2363
  115. build/lib/build/lib/siat/markowitz2.py +0 -3150
  116. build/lib/build/lib/siat/markowitz2_20250704.py +0 -2969
  117. build/lib/build/lib/siat/markowitz2_20250705.py +0 -3158
  118. build/lib/build/lib/siat/markowitz_simple.py +0 -373
  119. build/lib/build/lib/siat/ml_cases.py +0 -2291
  120. build/lib/build/lib/siat/ml_cases_example.py +0 -60
  121. build/lib/build/lib/siat/option_china.py +0 -3069
  122. build/lib/build/lib/siat/option_pricing.py +0 -1925
  123. build/lib/build/lib/siat/other_indexes.py +0 -409
  124. build/lib/build/lib/siat/risk_adjusted_return.py +0 -1576
  125. build/lib/build/lib/siat/risk_adjusted_return2.py +0 -1900
  126. build/lib/build/lib/siat/risk_evaluation.py +0 -2218
  127. build/lib/build/lib/siat/risk_free_rate.py +0 -351
  128. build/lib/build/lib/siat/sector_china.py +0 -4140
  129. build/lib/build/lib/siat/security_price2.py +0 -727
  130. build/lib/build/lib/siat/security_prices.py +0 -3408
  131. build/lib/build/lib/siat/security_trend.py +0 -402
  132. build/lib/build/lib/siat/security_trend2.py +0 -646
  133. build/lib/build/lib/siat/stock.py +0 -4284
  134. build/lib/build/lib/siat/stock_advice_linear.py +0 -934
  135. build/lib/build/lib/siat/stock_base.py +0 -26
  136. build/lib/build/lib/siat/stock_china.py +0 -2095
  137. build/lib/build/lib/siat/stock_prices_kneighbors.py +0 -910
  138. build/lib/build/lib/siat/stock_prices_linear.py +0 -386
  139. build/lib/build/lib/siat/stock_profile.py +0 -707
  140. build/lib/build/lib/siat/stock_technical.py +0 -3305
  141. build/lib/build/lib/siat/stooq.py +0 -74
  142. build/lib/build/lib/siat/transaction.py +0 -347
  143. build/lib/build/lib/siat/translate.py +0 -5183
  144. build/lib/build/lib/siat/valuation.py +0 -1378
  145. build/lib/build/lib/siat/valuation_china.py +0 -2076
  146. build/lib/build/lib/siat/var_model_validation.py +0 -444
  147. build/lib/build/lib/siat/yf_name.py +0 -811
  148. build/lib/siat/__init__.py +0 -75
  149. build/lib/siat/allin.py +0 -137
  150. build/lib/siat/assets_liquidity.py +0 -915
  151. build/lib/siat/beta_adjustment.py +0 -1058
  152. build/lib/siat/beta_adjustment_china.py +0 -548
  153. build/lib/siat/blockchain.py +0 -143
  154. build/lib/siat/bond.py +0 -2900
  155. build/lib/siat/bond_base.py +0 -992
  156. build/lib/siat/bond_china.py +0 -100
  157. build/lib/siat/bond_zh_sina.py +0 -143
  158. build/lib/siat/capm_beta.py +0 -783
  159. build/lib/siat/capm_beta2.py +0 -887
  160. build/lib/siat/common.py +0 -5360
  161. build/lib/siat/compare_cross.py +0 -642
  162. build/lib/siat/copyrights.py +0 -18
  163. build/lib/siat/cryptocurrency.py +0 -667
  164. build/lib/siat/economy.py +0 -1471
  165. build/lib/siat/economy2.py +0 -1853
  166. build/lib/siat/esg.py +0 -536
  167. build/lib/siat/event_study.py +0 -815
  168. build/lib/siat/fama_french.py +0 -1521
  169. build/lib/siat/fin_stmt2_yahoo.py +0 -982
  170. build/lib/siat/financial_base.py +0 -1160
  171. build/lib/siat/financial_statements.py +0 -598
  172. build/lib/siat/financials.py +0 -2339
  173. build/lib/siat/financials2.py +0 -1278
  174. build/lib/siat/financials_china.py +0 -4433
  175. build/lib/siat/financials_china2.py +0 -2212
  176. build/lib/siat/fund.py +0 -629
  177. build/lib/siat/fund_china.py +0 -3307
  178. build/lib/siat/future_china.py +0 -551
  179. build/lib/siat/google_authenticator.py +0 -47
  180. build/lib/siat/grafix.py +0 -3636
  181. build/lib/siat/holding_risk.py +0 -867
  182. build/lib/siat/luchy_draw.py +0 -638
  183. build/lib/siat/market_china.py +0 -1168
  184. build/lib/siat/markowitz.py +0 -2363
  185. build/lib/siat/markowitz2.py +0 -3150
  186. build/lib/siat/markowitz2_20250704.py +0 -2969
  187. build/lib/siat/markowitz2_20250705.py +0 -3158
  188. build/lib/siat/markowitz_simple.py +0 -373
  189. build/lib/siat/ml_cases.py +0 -2291
  190. build/lib/siat/ml_cases_example.py +0 -60
  191. build/lib/siat/option_china.py +0 -3069
  192. build/lib/siat/option_pricing.py +0 -1925
  193. build/lib/siat/other_indexes.py +0 -409
  194. build/lib/siat/risk_adjusted_return.py +0 -1576
  195. build/lib/siat/risk_adjusted_return2.py +0 -1900
  196. build/lib/siat/risk_evaluation.py +0 -2218
  197. build/lib/siat/risk_free_rate.py +0 -351
  198. build/lib/siat/sector_china.py +0 -4140
  199. build/lib/siat/security_price2.py +0 -727
  200. build/lib/siat/security_prices.py +0 -3408
  201. build/lib/siat/security_trend.py +0 -402
  202. build/lib/siat/security_trend2.py +0 -646
  203. build/lib/siat/stock.py +0 -4284
  204. build/lib/siat/stock_advice_linear.py +0 -934
  205. build/lib/siat/stock_base.py +0 -26
  206. build/lib/siat/stock_china.py +0 -2095
  207. build/lib/siat/stock_prices_kneighbors.py +0 -910
  208. build/lib/siat/stock_prices_linear.py +0 -386
  209. build/lib/siat/stock_profile.py +0 -707
  210. build/lib/siat/stock_technical.py +0 -3305
  211. build/lib/siat/stooq.py +0 -74
  212. build/lib/siat/transaction.py +0 -347
  213. build/lib/siat/translate.py +0 -5183
  214. build/lib/siat/valuation.py +0 -1378
  215. build/lib/siat/valuation_china.py +0 -2076
  216. build/lib/siat/var_model_validation.py +0 -444
  217. build/lib/siat/yf_name.py +0 -811
  218. siat-3.10.132.dist-info/RECORD +0 -218
@@ -1,4140 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- 本模块功能:中国行业板块市场分析
4
- 所属工具包:证券投资分析工具SIAT
5
- SIAT:Security Investment Analysis Tool
6
- 创建日期:2020年10月20日
7
- 最新修订日期:2020年10月21日
8
- 作者:王德宏 (WANG Dehong, Peter)
9
- 作者单位:北京外国语大学国际商学院
10
- 版权所有:王德宏
11
- 用途限制:仅限研究与教学使用,不可商用!商用需要额外授权。
12
- 特别声明:作者不对使用本工具进行证券投资导致的任何损益负责!
13
- """
14
-
15
- #==============================================================================
16
- #关闭所有警告
17
- import warnings; warnings.filterwarnings('ignore')
18
- from siat.common import *
19
- from siat.translate import *
20
- from siat.bond_base import *
21
- from siat.stock import *
22
- from siat.risk_adjusted_return import *
23
- from siat.financials_china2 import *
24
- #==============================================================================
25
-
26
- if __name__=='__main__':
27
- indicator="新浪行业"
28
- indicator="启明星行业"
29
- indicator="地域"
30
- indicator="行业"
31
-
32
- def sector_list_china(indicator="新浪行业"):
33
- """
34
- 功能:行业分类列表
35
- indicator="新浪行业","启明星行业","概念","地域","行业"
36
- 来源网址:http://finance.sina.com.cn/stock/sl/#qmxindustry_1
37
- """
38
- #检查选项是否支持
39
- indicatorlist=["新浪行业","概念","地域","行业","启明星行业"]
40
- if indicator not in indicatorlist:
41
- print(" #Error(sector_list_china): unsupported sectoring method",indicator)
42
- print(" Supported sectoring methods:",indicatorlist)
43
- return None
44
-
45
- import akshare as ak
46
- try:
47
- df = ak.stock_sector_spot(indicator=indicator)
48
-
49
- #去掉空格,否则匹配容易失败
50
- df['板块']=df['板块'].apply(lambda x: x.strip())
51
- df['label']=df['label'].apply(lambda x: x.strip())
52
-
53
- except:
54
- print(" #Error(sector_list_china): data source unavailable for",indicator)
55
- print(" Possible reason 1: data source is self-updating now.")
56
- print(" Possible reason 2: need to upgrade akshare.")
57
- print(" Possible reason 3: data source not reachable under vpn.")
58
-
59
- return None
60
-
61
- sectorlist=list(df['板块'])
62
- #按照拼音排序
63
- sectorlist=list(set(list(sectorlist)))
64
- sectorlist=sort_pinyin(sectorlist)
65
- #解决拼音相同带来的bug:陕西省 vs 山西省
66
- if '陕西省' in sectorlist:
67
- pos=sectorlist.index('陕西省')
68
- if sectorlist[pos+1] == '陕西省':
69
- sectorlist[pos] = '山西省'
70
- if '山西省' in sectorlist:
71
- pos=sectorlist.index('山西省')
72
- if sectorlist[pos+1] == '山西省':
73
- sectorlist[pos+1] = '陕西省'
74
- listnum=len(sectorlist)
75
-
76
- if indicator != "行业":
77
- method=indicator
78
- else:
79
- method="证监会门类/大类"
80
- print("\n===== 中国股票市场的行业/板块:",listnum,"\b个(按"+method+"划分) =====\n")
81
-
82
- if indicator in ["新浪行业","启明星行业","概念"]:
83
- #板块名字长度
84
- maxlen=0
85
- for s in sectorlist:
86
- l=strlen(s)
87
- if l > maxlen: maxlen=l
88
- #每行打印板块名字个数
89
- rownum=int(80/(maxlen+2))
90
-
91
- for d in sectorlist:
92
- if strlen(d) < maxlen:
93
- dd=d+" "*(maxlen-strlen(d))
94
- else:
95
- dd=d
96
- print(dd,end=' ')
97
- pos=sectorlist.index(d)+1
98
- if (pos % rownum ==0) or (pos==listnum): print(' ')
99
-
100
- #if indicator in ["地域","行业"]:
101
- if indicator in ["地域"]:
102
- linemaxlen=60
103
- linelen=0
104
- for d in sectorlist:
105
- dlen=strlen(d)
106
- pos=sectorlist.index(d)+1
107
- #超过行长
108
- if (linelen+dlen) > linemaxlen:
109
- print(' '); linelen=0
110
- #是否最后一项
111
- if pos < listnum:
112
- print(d,end=', ')
113
- else:
114
- print(d+"。"); break
115
- linelen=linelen+dlen
116
-
117
- #证监会行业划分
118
- if indicator in ["行业"]:
119
- df['csrc_type']=df['label'].apply(lambda x: x[8:9])
120
- csrc_type_list=list(set(list(df['csrc_type'])))
121
- csrc_type_list.sort()
122
-
123
- for t in csrc_type_list:
124
- dft=df[df['csrc_type']==t]
125
- sectorlist=list(dft['板块'])
126
- listnum=len(sectorlist)
127
-
128
- linemaxlen=80
129
- linelen=0
130
- print(t,end=': ')
131
- for d in sectorlist:
132
- dlen=strlen(d)
133
- pos=sectorlist.index(d)+1
134
- #超过行长
135
- if (linelen+dlen) > linemaxlen:
136
- print(' '); linelen=0
137
- #是否最后一项
138
- if pos < listnum:
139
- print(d,end=', ')
140
- else:
141
- #print(d+"。"); break
142
- print(d+" "); break
143
- linelen=linelen+dlen
144
-
145
-
146
- import datetime
147
- today = datetime.date.today()
148
- print("\n*** 信息来源:新浪财经,",today)
149
-
150
- return df
151
-
152
-
153
- #==============================================================================
154
- if __name__=='__main__':
155
- sector_name="房地产"
156
- sector_name="房"
157
- sector_name="煤炭"
158
- sector_name="华为"
159
-
160
- indicator='新浪行业'
161
- indicator="启明星行业"
162
- indicator="地域"
163
- indicator="行业"
164
-
165
- sector_code_china(sector_name)
166
-
167
- def sector_code_sina(sector_name):
168
- """
169
- 功能:套壳sector_code_china
170
- """
171
- sector_code_china(sector_name)
172
- return
173
-
174
- def sector_code_china(sector_name):
175
- """
176
- 功能:查找行业、板块名称对应的板块代码
177
- """
178
- import akshare as ak
179
- print("\n===== 查询行业/板块代码 =====")
180
-
181
- indicatorlist=["新浪行业","概念","地域","启明星行业","行业"]
182
- sector_code=''; found=0
183
- for i in indicatorlist:
184
- dfi=ak.stock_sector_spot(indicator=i)
185
-
186
- #去掉空格,否则匹配容易失败
187
- dfi['板块']=dfi['板块'].apply(lambda x: x.strip())
188
- dfi['label']=dfi['label'].apply(lambda x: x.strip())
189
-
190
- try:
191
- #sector_code=list(dfi[dfi['板块']==sector_name]['label'])[0]
192
- dfi['match']=dfi['板块'].apply(lambda x: 1 if sector_name in x else 0)
193
- found=found+dfi['match'].sum()
194
-
195
- sector_code=list(dfi[dfi['match']==1]['label'])
196
- sector_name1=list(dfi[dfi['match']==1]['板块'])
197
-
198
- #记录找到的板块分类
199
- indicator=i
200
-
201
- #if found > 0: print(" ")
202
- if indicator == "行业": indicator = "证监会行业"
203
- if indicator == "概念": indicator = "新浪概念"
204
-
205
- if len(sector_code)>0:
206
- """
207
- print("行业/板块名称:",sector_name1)
208
- #print_list(sector_name1,leading_blanks=1)
209
-
210
- print("行业/板块代码:",sector_code,end='')
211
- #print_list(sector_code,leading_blanks=1)
212
-
213
- print("("+indicator+"分类)\n")
214
- """
215
- print("行业/板块名称:",end='')
216
- print_list(sector_name1,leading_blanks=1)
217
-
218
- print("行业/板块代码:",end='')
219
- print_list(sector_code,leading_blanks=1,end='')
220
- print("("+indicator+"分类)\n")
221
-
222
-
223
- except:
224
- # 无意义,仅为调试
225
- pass
226
- continue
227
-
228
- #未找到板块代码
229
- if found==0:
230
- print("*** Sorry, no sector name found for",sector_name)
231
- return
232
-
233
- return
234
-
235
- if __name__=='__main__':
236
- sector_name="房地产"
237
- df=sector_code_china(sector_name)
238
- df=sector_code_china("医药生物")
239
- df=sector_code_china("资本市场服务")
240
-
241
- #==============================================================================
242
- if __name__=='__main__':
243
- comp="xxx"
244
- comp="涨跌幅"
245
- comp="成交量"
246
- comp="平均价格"
247
- comp="公司家数"
248
-
249
- indicator="+++"
250
- indicator="新浪行业"
251
- indicator="启明星行业"
252
- indicator="地域"
253
- indicator="行业"
254
- num=10
255
-
256
- def sector_rank_sina(indicator="涨跌幅",category="新浪行业",rank=5):
257
- """
258
- 功能:套壳sector_rank_china
259
- """
260
- df=sector_rank_china(comp=indicator,indicator=category,num=rank)
261
- return df
262
-
263
- #def sector_rank_china(comp="涨跌幅",indicator="新浪行业",num=10):
264
- def sector_rank_china(ticker="新浪行业",indicator="涨跌幅",rank=10):
265
- """
266
- 功能:按照比较指标降序排列
267
- ticker="新浪行业","启明星行业","概念","地域","行业"
268
- indicator="涨跌幅",平均价格,公司家数
269
- rank:为正数时列出最高的前几名,为负数时列出最后几名
270
-
271
- 注意:公司家数字段最大值为100,超过100仅显示为100
272
- """
273
- comp=indicator
274
- indicator=ticker
275
- num=rank
276
-
277
- #检查选项是否支持
278
- #complist=["涨跌幅","成交量","平均价格","公司家数"]
279
- complist=["涨跌幅","平均价格","公司家数"]
280
- if comp not in complist:
281
- print(" #Warning(sector_rank_china): unsupported measurement",comp)
282
- print(" Supported measurements:",complist)
283
- return None
284
-
285
- indicatorlist=["新浪行业","概念","地域","启明星行业","行业"]
286
- if indicator not in indicatorlist:
287
- print(" #Warning(sector_list_china): unsupported sectoring method",indicator)
288
- print(" Supported sectoring method:",indicatorlist)
289
- return None
290
-
291
- import akshare as ak
292
- try:
293
- df = ak.stock_sector_spot(indicator=indicator)
294
-
295
- #去掉空格,否则匹配容易失败
296
- df['板块']=df['板块'].apply(lambda x: x.strip())
297
- df['label']=df['label'].apply(lambda x: x.strip())
298
-
299
- except:
300
- print(" #Warning(sector_rank_china): data source tentatively unavailable for",indicator)
301
- print(" Possible reason: data source is self-updating.")
302
- print(" Solution: have a breath of fresh air and try later.")
303
- return None
304
-
305
- df.dropna(inplace=True)
306
- #出现列名重名,强制修改列名
307
- df.columns=['label','板块','公司家数','平均价格','涨跌额','涨跌幅', \
308
- '总成交量(手)','总成交额(万元)','个股代码','个股涨跌幅','个股股价', \
309
- '个股涨跌额','个股名称']
310
- df['均价']=round(df['平均价格'].astype('float'),2)
311
- df['涨跌幅%']=round(df['涨跌幅'].astype('float'),2)
312
- #平均成交量:万手
313
- df['平均成交量']=(df['总成交量(手)'].astype('float')/df['公司家数'].astype('float')/10000)
314
- df['平均成交量']=round(df['平均成交量'],2)
315
- #平均成交额:亿元
316
- df['平均成交额']=(df['总成交额(万元)'].astype('float')/df['公司家数'].astype('float'))/10000
317
- df['平均成交额']=round(df['平均成交额'],2)
318
- stkcd=lambda x: x[2:]
319
- df['个股代码']=df['个股代码'].apply(stkcd)
320
- try:
321
- df['个股涨跌幅%']=round(df['个股涨跌幅'].astype('float'),2)
322
- except:
323
- pass
324
- try:
325
- df['个股股价']=round(df['个股股价'].astype('float'),2)
326
- except:
327
- pass
328
- try:
329
- df['公司家数']=df['公司家数'].astype('int')
330
- except:
331
- pass
332
- df2=df[['板块','涨跌幅%','平均成交量','平均成交额','均价', \
333
- '公司家数','label','个股名称','个股代码','个股涨跌幅','个股股价']].copy()
334
- df2=df2.rename(columns={'个股名称':'代表个股','label':'板块代码'})
335
-
336
- #删除无效的记录
337
- df2=df2.drop(df2[df2['均价'] == 0.0].index)
338
-
339
- if comp == "涨跌幅":
340
- df3=df2[['板块','涨跌幅%','均价','公司家数','板块代码','代表个股']]
341
- df3.sort_values(by=['涨跌幅%'],ascending=False,inplace=True)
342
- """
343
- if comp == "成交量":
344
- df3=df2[['板块','平均成交量','涨跌幅%','均价','公司家数','板块代码','代表个股']]
345
- df3.sort_values(by=['平均成交量'],ascending=False,inplace=True)
346
- """
347
- if comp == "平均价格":
348
- df3=df2[['板块','均价','涨跌幅%','公司家数','板块代码','代表个股']]
349
- df3.sort_values(by=['均价'],ascending=False,inplace=True)
350
- if comp == "公司家数":
351
- df3=df2[['板块','公司家数','均价','涨跌幅%','板块代码','代表个股']]
352
- df3.sort_values(by=['公司家数'],ascending=False,inplace=True)
353
- df3.reset_index(drop=True,inplace=True)
354
-
355
- #设置打印对齐
356
- import pandas as pd
357
- pd.set_option('display.max_columns', 1000)
358
- pd.set_option('display.width', 1000)
359
- pd.set_option('display.max_colwidth', 1000)
360
- pd.set_option('display.unicode.ambiguous_as_wide', True)
361
- pd.set_option('display.unicode.east_asian_width', True)
362
-
363
- if indicator == "行业":
364
- indtag="证监会行业"
365
- else:
366
- indtag=indicator
367
-
368
- #处理空记录
369
- if len(df3) == 0:
370
- print(" #Warning(sector_rank_china):data source tentatively unavailable for",comp,indicator)
371
- print(" Possible reason: data source is self-updating.")
372
- print(" Solution: have a breath of fresh air and try later.")
373
- return
374
-
375
- df3.index=df3.index + 1
376
-
377
- df3_collist=list(df3)
378
- df3['序号']=df3.index
379
- df3=df3[['序号']+df3_collist]
380
-
381
- """
382
- print("\n===== 中国股票市场:板块"+comp+"排行榜(按照"+indtag+"分类) =====")
383
- if num > 0:
384
- print(df3.head(num))
385
- else:
386
- print(df3.tail(-num))
387
-
388
- import datetime
389
- today = datetime.date.today()
390
- footnote1="*注:代表个股是指板块中涨幅最高或跌幅最低的股票"
391
- print(footnote1)
392
- print(" 板块数:",len(df),"\b, 数据来源:新浪财经,",today,"\b(信息为上个交易日)")
393
- """
394
- if num > 0:
395
- df4=df3.head(num)
396
- else:
397
- df4=df3.tail(-num)
398
-
399
- titletxt="中国股票市场:板块"+comp+"排行榜(按照"+indtag+"分类)"
400
- import datetime; stoday = datetime.date.today()
401
- footnote1="注:代表个股是指板块中涨幅最高或跌幅最低的股票\n"
402
- #footnote2="板块总数"+str(len(df))+",数据来源:新浪财经,"+str(stoday)+"(截至昨日)"
403
- footnote2="板块总数"+str(len(df))+",数据来源:新浪财经,"+str(stoday)
404
- footnote=footnote1+footnote2
405
-
406
- df_display_CSS(df4,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=2, \
407
- first_col_align='center',second_col_align='left', \
408
- last_col_align='left',other_col_align='right', \
409
- titile_font_size='16px',heading_font_size='15px', \
410
- data_font_size='15px')
411
-
412
- return df3
413
-
414
- #==============================================================================
415
- if __name__=='__main__':
416
- sector="new_dlhy"
417
- sector="xyz"
418
-
419
- num=10
420
-
421
- def sector_detail_sina(sector="new_dlhy",indicator="涨跌幅",rank=5):
422
- """
423
- 功能:套壳sector_detail_china
424
- """
425
- df=sector_detail_china(sector=sector,comp=indicator,num=rank)
426
- return df
427
-
428
-
429
- #def sector_detail_china(sector="new_dlhy",comp="涨跌幅",num=10):
430
- def sector_detail_china(ticker="new_dlhy",indicator="涨跌幅",rank=10):
431
- """
432
- 功能:按照板块内部股票的比较指标降序排列
433
- ticker:板块代码
434
- indicator:默认"涨跌幅",还可选"换手率"、"收盘价"、"市盈率"、"市净率"、"总市值"、"流通市值"
435
- rank:为正数时列出最高的前几名,为负数时列出最后几名
436
- """
437
- sector=ticker
438
- comp=indicator
439
- num=rank
440
-
441
- debug=False
442
-
443
- #检查选项是否支持
444
- complist=["涨跌幅","换手率","收盘价","市盈率","市净率","总市值","流通市值"]
445
- if comp not in complist:
446
- print(" #Error(sector_detail_china): unsupported measurement",comp)
447
- print(" Supported measurements:",complist)
448
- return None
449
-
450
- #检查板块代码是否存在
451
- import akshare as ak
452
- indicatorlist=["新浪行业","概念","地域","启明星行业","行业"]
453
- sector_name=''
454
- for i in indicatorlist:
455
- dfi=ak.stock_sector_spot(indicator=i)
456
-
457
- #去掉字符串中的空格,否则匹配容易失败
458
- dfi['板块']=dfi['板块'].apply(lambda x: x.strip())
459
- dfi['label']=dfi['label'].apply(lambda x: x.strip())
460
-
461
- if debug: print("i=",i)
462
- try:
463
- sector_name=list(dfi[dfi['label']==sector]['板块'])[0]
464
- #记录找到的板块分类
465
- indicator=i
466
- #记录找到的板块概述
467
- dff=dfi[dfi['label']==sector]
468
- break
469
- except:
470
- continue
471
- #未找到板块代码
472
- if sector_name == '':
473
- print(" #Error(sector_detail_china): unsupported sector code",sector)
474
- return
475
-
476
- #板块成份股
477
- try:
478
- df = ak.stock_sector_detail(sector=sector)
479
- except:
480
- print(" #Error(sector_rank_china): data source tentatively unavailable for",sector)
481
- print(" Possible reason: data source is self-updating.")
482
- print(" Solution: have a breath of fresh air and try later.")
483
- return None
484
-
485
- df.dropna(inplace=True)
486
- df['个股代码']=df['code']
487
- df['个股名称']=df['name']
488
- df['涨跌幅%']=round(df['changepercent'].astype('float'),2)
489
- df['收盘价']=round(df['settlement'].astype('float'),2)
490
- #成交量:万手
491
- df['成交量']=round(df['volume'].astype('float')/10000,2)
492
- #成交额:亿元
493
- df['成交额']=round(df['amount'].astype('float')/10000,2)
494
- df['市盈率']=round(df['per'].astype('float'),2)
495
- df['市净率']=round(df['pb'].astype('float'),2)
496
- #总市值:亿元
497
- df['总市值']=round(df['mktcap'].astype('float')/10000,2)
498
- #流通市值:亿元
499
- df['流通市值']=round(df['nmc'].astype('float')/10000,2)
500
- df['换手率%']=round(df['turnoverratio'].astype('float'),2)
501
-
502
- #删除无效的记录
503
- df=df.drop(df[df['收盘价'] == 0].index)
504
- df=df.drop(df[df['流通市值'] == 0].index)
505
- df=df.drop(df[df['总市值'] == 0].index)
506
- df=df.drop(df[df['市盈率'] == 0].index)
507
-
508
- df2=df[[ '个股代码','个股名称','涨跌幅%','收盘价','成交量','成交额', \
509
- '市盈率','市净率','换手率%','总市值','流通市值']].copy()
510
-
511
- if comp == "涨跌幅":
512
- df3=df2[['个股名称','个股代码','涨跌幅%','换手率%','收盘价','市盈率','市净率','流通市值']]
513
- df3.sort_values(by=['涨跌幅%'],ascending=False,inplace=True)
514
- if comp == "换手率":
515
- df3=df2[['个股名称','个股代码','换手率%','涨跌幅%','收盘价','市盈率','市净率','流通市值']]
516
- df3.sort_values(by=['换手率%'],ascending=False,inplace=True)
517
- if comp == "收盘价":
518
- df3=df2[['个股名称','个股代码','收盘价','换手率%','涨跌幅%','市盈率','市净率','流通市值']]
519
- df3.sort_values(by=['收盘价'],ascending=False,inplace=True)
520
- if comp == "市盈率":
521
- df3=df2[['个股名称','个股代码','市盈率','市净率','收盘价','换手率%','涨跌幅%','流通市值']]
522
- df3.sort_values(by=['市盈率'],ascending=False,inplace=True)
523
- if comp == "市净率":
524
- df3=df2[['个股名称','个股代码','市净率','市盈率','收盘价','换手率%','涨跌幅%','流通市值']]
525
- df3.sort_values(by=['市净率'],ascending=False,inplace=True)
526
- if comp == "流通市值":
527
- df3=df2[['个股名称','个股代码','流通市值','总市值','市净率','市盈率','收盘价','换手率%','涨跌幅%']]
528
- df3.sort_values(by=['流通市值'],ascending=False,inplace=True)
529
- if comp == "总市值":
530
- df3=df2[['个股名称','个股代码','总市值','流通市值','市净率','市盈率','收盘价','换手率%','涨跌幅%']]
531
- df3.sort_values(by=['总市值'],ascending=False,inplace=True)
532
-
533
- df3.reset_index(drop=True,inplace=True)
534
-
535
- #设置打印对齐
536
- import pandas as pd
537
- pd.set_option('display.max_columns', 1000)
538
- pd.set_option('display.width', 1000)
539
- pd.set_option('display.max_colwidth', 1000)
540
- pd.set_option('display.unicode.ambiguous_as_wide', True)
541
- pd.set_option('display.unicode.east_asian_width', True)
542
-
543
- df3.index=df3.index + 1
544
-
545
- df3_collist=list(df3)
546
- df3['序号']=df3.index
547
- df3=df3[['序号']+df3_collist]
548
- """
549
- print("\n=== 中国股票市场:"+sector_name+"板块,成份股排行榜(按照"+comp+") ===\n")
550
- if num > 0:
551
- print(df3.head(num))
552
- else:
553
- print(df3.tail(-num))
554
-
555
- import datetime
556
- today = datetime.date.today()
557
- footnote1="\n 注:市值的单位是亿元人民币, "
558
- print(footnote1+"板块内成份股个数:",len(df))
559
- print(" 数据来源:新浪财经,",today,"\b(信息为上个交易日)")
560
- """
561
- if num > 0:
562
- df4=df3.head(num)
563
- else:
564
- df4=df3.tail(-num)
565
-
566
- titletxt="中国股票市场:"+sector_name+"板块,成份股排行榜(基于"+comp+")"
567
-
568
- import datetime; stoday = datetime.date.today()
569
- if "流通市值" in df3_collist:
570
- footnote1="市值单位:亿元,板块成份股:"+str(len(df))+'\n'
571
- #footnote2="数据来源:新浪财经,"+str(stoday)+"(截至昨日)"
572
- footnote2="数据来源:新浪财经,"+str(stoday)
573
- else:
574
- footnote1="板块成份股:"+str(len(df))+','
575
- #footnote2="数据来源:新浪财经,"+str(stoday)+"(截至昨日)"
576
- footnote2="数据来源:新浪财经,"+str(stoday)
577
- footnote=footnote1+footnote2
578
-
579
- df_display_CSS(df4,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=2, \
580
- first_col_align='center',second_col_align='left', \
581
- last_col_align='right',other_col_align='right', \
582
- titile_font_size='16px',heading_font_size='15px', \
583
- data_font_size='15px')
584
-
585
- #return df2
586
- return df4
587
-
588
- #==============================================================================
589
- if __name__=='__main__':
590
- ticker='600021.SS'
591
- ticker='000661.SZ'
592
- ticker='999999.SS'
593
- sector="new_dlhy"
594
- sector="yysw"
595
- sector="xyz"
596
-
597
- ticker='000661.SZ'; sector="gn_swym"
598
-
599
- def sector_position_sina(ticker,sector="new_dlhy",return_result=False):
600
- """
601
- 功能:套壳sector_position_china
602
- """
603
- df=sector_position_china(ticker=ticker,sector=sector)
604
-
605
- if return_result:
606
- return df
607
- else:
608
- return
609
-
610
- def sector_position_china(ticker,sector="new_dlhy"):
611
- """
612
- 功能:查找一只股票在板块内的分位数位置
613
- ticker:股票代码
614
- sector:板块代码
615
- """
616
- ticker1=ticker[:6]
617
-
618
- import akshare as ak
619
- import numpy as np
620
- import pandas as pd
621
-
622
- #检查板块代码是否存在
623
- indicatorlist=["新浪行业","概念","地域","启明星行业","行业"]
624
- sector_name=''
625
- for i in indicatorlist:
626
- dfi=ak.stock_sector_spot(indicator=i)
627
-
628
- #去掉空格,否则匹配容易失败
629
- dfi['板块']=dfi['板块'].apply(lambda x: x.strip())
630
- dfi['label']=dfi['label'].apply(lambda x: x.strip())
631
-
632
- try:
633
- sector_name=list(dfi[dfi['label']==sector]['板块'])[0]
634
- #记录找到的板块分类
635
- indicator=i
636
- #记录找到的板块概述
637
- dff=dfi[dfi['label']==sector]
638
- break
639
- except:
640
- continue
641
-
642
- #未找到板块代码
643
- if sector_name == '':
644
- print(" #Warning(sector_position_china): unsupported sector code",sector)
645
- return None
646
-
647
- #板块成份股
648
- try:
649
- #注意:启明星行业分类没有成份股明细
650
- df = ak.stock_sector_detail(sector=sector)
651
- except:
652
- print(" #Warning(sector_position_china): sector detail not available for",sector,'by',indicator)
653
- if indicator !="启明星行业":
654
- print(" Possible reason: data source is self-updating.")
655
- print(" Solution: have a breath of fresh air and try later.")
656
- return None
657
-
658
- #清洗原始数据: #可能同时含有数值和字符串,强制转换成数值
659
- df['changepercent']=round(df['changepercent'].astype('float'),2)
660
- df['turnoverratio']=round(df['turnoverratio'].astype('float'),2)
661
- df['settlement']=round(df['settlement'].astype('float'),2)
662
- df['per']=round(df['per'].astype('float'),2)
663
- df['pb']=round(df['pb'].astype('float'),2)
664
- df['nmc']=round(df['nmc'].astype('int')/10000,2)
665
- df['mktcap']=round(df['mktcap'].astype('int')/10000,2)
666
-
667
- #检查股票代码是否存在
668
- sdf=df[df['code']==ticker1]
669
- if len(sdf) == 0:
670
- print(" #Warning(sector_position_china): retrieving",ticker,"failed in sector",sector,sector_name)
671
- print(" Solution: make sure stock code correct, try later if network is slow")
672
- return None
673
- sname=list(sdf['name'])[0]
674
-
675
- #确定比较范围
676
- complist=['changepercent','turnoverratio','settlement','per','pb','nmc','mktcap']
677
- vminlist=['settlement','per','pb','nmc','mktcap'] #板块最小值若为零需要标记的列
678
- compnames=['涨跌幅%','换手率%','收盘价(元)','市盈率','市净率','流通市值(亿元)','总市值(亿元)']
679
- compdf=pd.DataFrame(columns=['指标名称','指标数值','板块排名','板块分位数%','板块中位数','板块最小值','板块最大值'])
680
-
681
- from scipy.stats import percentileofscore
682
-
683
- for c in complist:
684
- v=list(sdf[c])[0]
685
- #vlist=list(set(list(df[c])))
686
- vlist=list(df[c])
687
- vlist.sort() #升序
688
- vmin=round(min(vlist),2)
689
- if vmin==0.00 and c in vminlist:
690
- vmin='--'
691
-
692
- vmax=round(max(vlist),2)
693
- vmedian=round(np.median(vlist),2)
694
-
695
- pos=vlist.index(v)
696
- #pct=round((pos+1)/len(vlist)*100,2)
697
- #sector_rank=str(len(vlist)-pos)+'/'+str(len(vlist))
698
- sector_rank=str(len(vlist)-pos)
699
-
700
- pct=percentileofscore(vlist,v)
701
-
702
- s=pd.Series({'指标名称':compnames[complist.index(c)], \
703
- '指标数值':v,'板块排名':sector_rank,'板块分位数%':pct,'板块中位数':vmedian, \
704
- '板块最小值':vmin,'板块最大值':vmax})
705
- try:
706
- compdf=compdf.append(s,ignore_index=True)
707
- except:
708
- compdf=compdf._append(s,ignore_index=True)
709
-
710
- compdf.reset_index(drop=True,inplace=True)
711
- """
712
- print("\n======= 股票在所属行业/板块的位置分析 =======")
713
- print("股票: "+sname+" ("+ticker+")")
714
- print("所属行业/板块:"+sector_name+" ("+sector+", "+indicator+"分类)")
715
- print("")
716
-
717
- pd.set_option('display.max_columns', 1000)
718
- pd.set_option('display.width', 1000)
719
- pd.set_option('display.max_colwidth', 1000)
720
- pd.set_option('display.unicode.ambiguous_as_wide', True)
721
- pd.set_option('display.unicode.east_asian_width', True)
722
-
723
- print(compdf.to_string(index=False))
724
-
725
- import datetime
726
- today = datetime.date.today()
727
- print('') #空一行
728
- print("注:板块内成份股个数:",len(df),"\b, 数据来源:新浪财经,",today,"\b(信息为上个交易日)")
729
- """
730
- if indicator=="行业": indicator="证监会行业"
731
-
732
- titletxt="\n上市公司地位分析:"+sname+","+sector_name+"行业/板块("+indicator+"分类)"
733
- import datetime; stoday = datetime.date.today()
734
- footnote1=""
735
- #footnote2="成分股总数:"+str(len(df))+",数据来源:新浪财经,"+str(stoday)+"(截至昨日)"
736
- footnote2="成分股总数:"+str(len(df))+",数据来源:新浪财经,"+str(stoday)
737
- footnote=footnote1+footnote2
738
-
739
- #print("") #空一行
740
- df_display_CSS(compdf,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=2, \
741
- first_col_align='left',second_col_align='right', \
742
- last_col_align='right',other_col_align='right', \
743
- titile_font_size='16px',heading_font_size='15px', \
744
- data_font_size='15px')
745
-
746
-
747
- return df,compdf
748
-
749
-
750
- #==============================================================================
751
-
752
- def invest_concept_china(num=10,max_sleep=30):
753
- """
754
- 废弃!
755
- 功能:汇总新浪投资概念股票名单,排行
756
- 来源网址:http://finance.sina.com.cn/stock/sl/#qmxindustry_1
757
-
758
- 注意:网站有反爬虫,循环做不下去!
759
- """
760
- print("\nWarning: This function might cause your IP address banned by data source!")
761
- print("Searching stocks with investment concepts in China, it may take long time ...")
762
-
763
- #找出投资概念列表
764
- import akshare as ak
765
- cdf = ak.stock_sector_spot(indicator="概念")
766
-
767
- #去掉空格,否则匹配容易失败
768
- cdf['板块']=cdf['板块'].apply(lambda x: x.strip())
769
- cdf['label']=cdf['label'].apply(lambda x: x.strip())
770
-
771
- cdf.sort_values(by=['label'],ascending=True,inplace=True)
772
- clist=list(cdf['label'])
773
- cnames=list(cdf['板块'])
774
- cnum=len(clist)
775
-
776
- import pandas as pd
777
- totaldf=pd.DataFrame()
778
- import time; import random
779
- i=0
780
- #新浪财经有反爬虫,这个循环做不下去
781
- for c in clist:
782
- print("...Searching for conceptual sector",c,cnames[clist.index(c)],end='')
783
- try:
784
- sdf = ak.stock_sector_detail(c)
785
- sdf['板块']=cnames(clist.index(c))
786
- totaldf=pd.concat([totaldf,sdf],ignore_index=True)
787
- print(', found.')
788
- except:
789
- print(', failed:-(')
790
- #continue
791
- #等待一会儿,避免被禁访问
792
- #time.sleep(max_sleep)
793
- random_int=random.randint(1,max_sleep)
794
- time.sleep(random_int)
795
-
796
- i=i+1
797
- if i % 20 == 0:
798
- print(int(i/cnum*100),'\b%',end=' ')
799
- print("...Searching completed.")
800
-
801
- if len(totaldf) == 0:
802
- print(" #Error(sector_concept_china): data source tentatively banned your access:-(")
803
- print(" Solutions:1) try a bit later, or 2) switch to another IP address.")
804
- return None
805
-
806
- #分组统计
807
- totaldfrank = totaldf.groupby('name')['板块','code'].count()
808
- totaldfrank.sort_values(by=['板块','code'],ascending=[False,True],inplace=True)
809
- totaldfrank['name']=totaldfrank.index
810
- totaldfrank.reset_index(drop=True,inplace=True)
811
-
812
- #更新每只股票持有的概念列表
813
- for i in totaldfrank.index:
814
- tdfsub=totaldf[totaldf['name']==totaldfrank.loc[i,"name"]]
815
- sectors=str(list(tdfsub['板块']))
816
- # 逐行修改列值
817
- totaldfrank.loc[i,"sectors"] = sectors
818
-
819
- #合成
820
- totaldf2=totaldf.drop('板块',axix=1)
821
- totaldf2.drop_duplicates(subset=['code'],keep='first',inplace=True)
822
- finaldf = pd.merge(totaldfrank,totaldf2,how='inner',on='name')
823
-
824
- return finaldf
825
-
826
-
827
- #==============================================================================
828
- def industry_sw_list_all():
829
- """
830
- 功能:输出申万指数所有代码df。动态,每次重新获取,自动更新!
831
- 输入:
832
- 输出:df,包括市场表征指数F,一级行业指数I,二级行业T,风格指数S,三级行业3
833
- """
834
- import pandas as pd
835
- import akshare as ak
836
-
837
- symboltypes=["市场表征", "一级行业", "二级行业", "风格指数","大类风格指数","金创指数"]
838
- indextypecodes=['F','1','2','S','B','C']
839
- industry=pd.DataFrame()
840
- for s in symboltypes:
841
- try:
842
- #目前有问题!
843
- dft = ak.index_realtime_sw(symbol=s)
844
- except: continue
845
-
846
- pos=symboltypes.index(s)
847
- dft['指数类别代码']=indextypecodes[pos]
848
- dft['指数类别名称']=s
849
-
850
- if len(industry)==0:
851
- industry=dft
852
- else:
853
- industry=pd.concat([industry,dft],ignore_index=True)
854
-
855
- industry2=industry[['指数类别代码','指数代码','指数名称']]
856
- industry2.columns=['type','code','name']
857
-
858
- #获取申万一级行业指数代码和名称
859
- #df1=ak.sw_index_first_info()
860
-
861
- #获取申万二级行业指数代码和名称
862
- #df2 = ak.sw_index_second_info()
863
-
864
- #获取申万三级行业指数代码和名称
865
- df3 = ak.sw_index_third_info()
866
- df3['type']='3'
867
- df3['code']=df3['行业代码'].apply(lambda x:x[:6])
868
- df3['name']=df3['行业名称']
869
- industry3=df3[['type','code','name']]
870
-
871
- industry_all=pd.concat([industry2,industry3],ignore_index=True)
872
- # 删除完全重复的行
873
- industry_all.drop_duplicates(inplace=True)
874
-
875
-
876
- return industry_all
877
-
878
- if __name__=='__main__':
879
- idf=industry_sw_list()
880
- idf=industry_sw_list_all()
881
-
882
- #==============================================================================
883
- if __name__=='__main__':
884
- idf=industry_sw_list_all()
885
-
886
- industry_sw_list_print(idf,numberPerLine=3)
887
-
888
- def industry_sw_list_print(idf,numberPerLine=3):
889
- """
890
- 功能:打印df定义形式,每3个一行,需要定期更新,并复制到函数industry_sw_list()
891
- """
892
-
893
- #遍历
894
- counter=0
895
- for index,row in idf.iterrows():
896
- #print(row['type'],row['code'],row['name'])
897
- print('[\''+row['type']+'\',\''+row['code']+'\',\''+row['name']+'\']',end=',')
898
- counter=counter+1
899
- if counter % numberPerLine ==0:
900
- print()
901
-
902
- return
903
-
904
- #==============================================================================
905
-
906
- def display_industry_sw(sw_level='1',numberPerLine=4,colalign='left'):
907
- """
908
- 按照类别打印申万行业列表,名称(代码),每行5个, 套壳函数
909
- """
910
- #itype_list=['1','2','3','F','S','B','C']
911
- itype_list=['1','2','3','F','S','B']
912
- #sw_level_list=['1','2','3','F','S','B','C']
913
- sw_level_list=['1','2','3','F','S','B']
914
-
915
- try:
916
- pos=sw_level_list.index(sw_level)
917
- except:
918
- print(f" #Warning(display_industry_sw): no such level in Shenwan system {sw_level}")
919
- print(f" Supported Shenwan system: {sw_level_list}")
920
-
921
- itype=itype_list[pos]
922
-
923
- print_industry_sw(itype=itype,numberPerLine=numberPerLine,colalign=colalign)
924
-
925
- return
926
-
927
-
928
-
929
- if __name__=='__main__':
930
- itype='1'
931
- numberPerLine=5
932
- colalign='left'
933
-
934
- print_industry_sw(itype='1',numberPerLine=5,colalign='right')
935
-
936
- def print_industry_sw(itype='1',numberPerLine=5,colalign='left'):
937
- """
938
- 功能:按照类别打印申万行业列表,名称(代码)
939
- 参数:
940
- itype:行业分级,默认'1'。
941
- F=市场表征, 1=一级行业, 2=二级行业, 3=三级行业, S="风格指数",B=大类风格,C=金创
942
- numberPerLine:每行个数,默认5
943
- colalign:对齐方式,默认'left'
944
-
945
- 示例:
946
- print_industry_sw(colalign='left')
947
- """
948
- df=industry_sw_list()
949
- df1=df[df['type']==itype]
950
- df1['name_code']=df1.apply(lambda x: x['name']+'('+x['code']+'.SW'+')',axis=1)
951
-
952
- symboltypes=["市场表征", "一级行业", "二级行业", "三级行业", "风格指数", "大类风格指数","金创指数"]
953
- indextypecodes=['F','1','2','3','S','B','C']
954
- pos=indextypecodes.index(itype)
955
- iname=symboltypes[pos]
956
-
957
- ilist=list(df1['name_code'])
958
- print("\n*** 申万行业分类:"+iname+",共计"+str(len(ilist))+'个行业(板块)')
959
-
960
- if itype=='2': numberPerLine=4
961
- if itype=='3': numberPerLine=3
962
-
963
- printInLine_md(ilist,numberPerLine=numberPerLine,colalign=colalign)
964
-
965
- return
966
-
967
- #==============================================================================
968
- def display_industry_component_sw(industry,numberPerLine=5,colalign='left'):
969
- """
970
- 打印申万行业的成分股,名称(代码), 包装函数
971
- industry: 申万行业名称或代码
972
- """
973
- industry1=industry.split('.')[0]
974
- if industry1.isdigit():
975
- print_industry_component_sw2(industry1,numberPerLine=numberPerLine,colalign=colalign)
976
- else:
977
- print_industry_component_sw(industry1,numberPerLine=numberPerLine,colalign=colalign)
978
-
979
- return
980
-
981
-
982
- if __name__=='__main__':
983
- iname='食品饮料'
984
- iname='银行'
985
- iname='汽车'
986
- iname='高价股指数'
987
- iname='申万A指'
988
- iname='大类风格-医药医疗'
989
-
990
- numberPerLine=5
991
- colalign='right'
992
-
993
- print_industry_component_sw(iname,numberPerLine=5,colalign='right')
994
-
995
- def print_industry_component_sw(iname,numberPerLine=5,colalign='left', \
996
- printout=True,return_result=False):
997
- """
998
- ===========================================================================
999
- 功能:打印申万行业的成分股,名称(代码)
1000
- iname:申万行业名称
1001
- numberPerLine:输出时每行显示个数,默认5
1002
- colalign:对齐方式,默认'left'
1003
- printout:是否显示,默认True
1004
- return_result:是否返回结果,默认False
1005
-
1006
- 示例:
1007
- print_industry_component_sw(iname="白酒Ⅲ")
1008
- """
1009
- try:
1010
- icode=industry_sw_code(iname)
1011
- except:
1012
- print(" #Warning(print_industry_component_sw): failed to find index name for",iname)
1013
- if return_result:
1014
- return []
1015
- else:
1016
- return
1017
-
1018
- if icode=='':
1019
- print(" #Warning(print_industry_component_sw): relevent index code not found for",iname)
1020
- if return_result:
1021
- return []
1022
- else:
1023
- return
1024
-
1025
- clist,cdf=industry_stock_sw(icode,top=1000)
1026
- if clist is None:
1027
- if return_result:
1028
- print(" #Warning(print_industry_component_sw): no component stock found for",iname)
1029
- return []
1030
- else:
1031
- return
1032
-
1033
- #cdf['icode']=cdf['证券代码'].apply(lambda x: x+'.SS' if x[:1] in ['6'] else (x+'.SZ' if x[:1] in ['0','3'] else x+'.BJ' ))
1034
- cdf['icode']=cdf['证券代码']
1035
-
1036
- # 删除'证券名称'为None的行
1037
- cdf=cdf.mask(cdf.eq('None')).dropna()
1038
-
1039
- # 合成证券名称与代码
1040
- cdf['name_code']=cdf.apply(lambda x: x['证券名称']+'('+x['icode']+')',axis=1)
1041
- ilist=list(cdf['name_code'])
1042
-
1043
- if printout:
1044
- #标题
1045
- import datetime as dt; stoday=dt.date.today()
1046
-
1047
- titletxt=iname+"("+icode+")行业/板块成分股:计"+str(len(ilist))+'只,按行业指数权重降序排列,'+str(stoday)
1048
- print("\n"+titletxt,end='')
1049
- #表格
1050
- printInLine_md(ilist,numberPerLine=numberPerLine,colalign=colalign)
1051
-
1052
- if return_result:
1053
- return ilist
1054
- else:
1055
- return
1056
-
1057
- #==============================================================================
1058
- if __name__=='__main__':
1059
- icode='850831.SW'
1060
- numberPerLine=5
1061
- colalign='right'
1062
-
1063
- print_industry_component_sw2(icode,numberPerLine=5,colalign='right')
1064
-
1065
- def print_industry_component_sw2(icode,numberPerLine=5,colalign='left'):
1066
- """
1067
- 打印申万行业的成分股,名称(代码)
1068
- 输入:申万行业代码,一二三级均可
1069
- """
1070
- icode=icode.split('.')[0]
1071
-
1072
- iname=industry_sw_name(icode)
1073
-
1074
- clist,cdf=industry_stock_sw(icode,top=1000)
1075
- if cdf is None:
1076
- print(" #Error(print_industry_component_sw2): failed to retrieve industry for",icode)
1077
- print(" Solution: make sure the industry code correct")
1078
- print(" If the code is correct, upgrade akshare, restart jupyter and try again")
1079
-
1080
- return
1081
-
1082
- #cdf['icode']=cdf['证券代码'].apply(lambda x: x+'.SS' if x[:1] in ['6'] else (x+'.SZ' if x[:1] in ['0','3'] else x+'.BJ' ))
1083
- cdf['icode']=cdf['证券代码']
1084
-
1085
- # 删除'证券名称'为None的行
1086
- cdf=cdf.mask(cdf.eq('None')).dropna()
1087
- cdf['name_code']=cdf.apply(lambda x: x['证券名称']+'('+x['icode']+')',axis=1)
1088
-
1089
- ilist=list(cdf['name_code'])
1090
- import datetime as dt; stoday=dt.date.today()
1091
- print("\n*** "+iname+'行业(板块)包括的股票:共计'+str(len(ilist))+'只,'+str(stoday)+"统计")
1092
-
1093
- printInLine_md(ilist,numberPerLine=numberPerLine,colalign=colalign)
1094
-
1095
- return
1096
-
1097
-
1098
- #==============================================================================
1099
- if __name__=='__main__':
1100
- iname='大类风格--医药医疗'
1101
-
1102
- industry_sw_code('光伏设备')
1103
-
1104
- def industry_sw_code(iname):
1105
- """
1106
- 功能:将申万指数名称转换为指数代码。
1107
- 输入:指数名称
1108
- 输出:指数代码
1109
- """
1110
- industry=industry_sw_list()
1111
-
1112
- try:
1113
- icode=industry[industry['name']==iname]['code'].values[0]
1114
- except:
1115
- #未查到
1116
- #print(" #Warning(industry_sw_code): industry name not found",iname)
1117
- return None
1118
-
1119
- return icode+'.SW'
1120
-
1121
- if __name__=='__main__':
1122
- iname='申万创业'
1123
- industry_sw_code(iname)
1124
-
1125
- #==============================================================================
1126
- def industry_sw_codes(inamelist):
1127
- """
1128
- 功能:将申万指数名称/列表转换为指数代码列表。
1129
- 输入:指数名称/列表
1130
- 输出:指数代码列表
1131
- """
1132
- industry=industry_sw_list()
1133
-
1134
- icodelist=[]
1135
- if isinstance(inamelist,str):
1136
- icode=industry_sw_code(inamelist)
1137
- if not (icode is None):
1138
- icodelist=[icode]
1139
- else:
1140
- if inamelist.isdigit():
1141
- return inamelist
1142
- else:
1143
- print(" #Warning(industries_sw_code): industry code not found for",inamelist)
1144
- return None
1145
-
1146
- if isinstance(inamelist,list):
1147
- if len(inamelist) == 0:
1148
- print(" #Warning(industries_sw_code): no industry code found in for",inamelist)
1149
- return None
1150
-
1151
- for i in inamelist:
1152
- icode=industry_sw_code(i)
1153
- if not (icode is None):
1154
- icodelist=icodelist+[icode]
1155
- else:
1156
- if i.isdigit():
1157
- icodelist=icodelist+[i]
1158
- else:
1159
- print(" #Warning(industries_sw_code): industry code not found",i)
1160
- return None
1161
-
1162
- return icodelist
1163
-
1164
- if __name__=='__main__':
1165
- inamelist='申万创业'
1166
- industry_sw_codes(inamelist)
1167
-
1168
- inamelist=['申万创业','申万投资','申万制造','申万消费']
1169
- industry_sw_codes(inamelist)
1170
- #==============================================================================
1171
- if __name__=='__main__':
1172
- start='2018-1-1'
1173
- end='2022-10-31'
1174
- measure='Exp Ret%'
1175
- itype='1'
1176
- graph=True
1177
- axisamp=0.8
1178
-
1179
- def industry_ranking_sw(start,end,measure='Exp Ret%', \
1180
- itype='1',period="day", \
1181
- graph=True,axisamp=0.8):
1182
- """
1183
- 完整版,全流程
1184
- 功能:模板,遍历某类申万指数,计算某项业绩指标,汇集排序
1185
- itype:
1186
- 股票类指数:F表征指数,n=1/2/3行业指数,S风格指数,B大类风格指数,C金创指数?
1187
- 基金类指数:J1/2/3基础一二三级,JF特色指数
1188
-
1189
- period="day"; choice of {"day", "week", "month"}
1190
- 绘图:柱状图,可选
1191
- """
1192
- #检查日期的合理性
1193
- result,start1,end1=check_period(start,end)
1194
-
1195
- #检查itype的合理性
1196
-
1197
- #获得指数代码
1198
- idf=industry_sw_list()
1199
- idf1=idf[idf['type']==itype]
1200
- ilist=list(idf1['code'])
1201
-
1202
- #循环获取指标
1203
- import pandas as pd
1204
- import akshare as ak
1205
- import datetime
1206
- df=pd.DataFrame(columns=['date','ticker','start','end','item','value'])
1207
-
1208
- print("\nSearching industry prices, it may take great time, please wait ...")
1209
-
1210
- fail_list=[]
1211
- for i in ilist:
1212
-
1213
- print(" Processing index",i,"\b, please wait ...")
1214
- #抓取指数价格,选取期间范围
1215
- try:
1216
- dft = ak.index_hist_sw(symbol=i,period="day")
1217
- except:
1218
- try:
1219
- dft = ak.index_hist_fund_sw(symbol=i,period="day")
1220
- dft['代码']=i
1221
- dft['收盘']=dft['收盘指数']
1222
- dft['开盘']=dft['收盘指数']
1223
- dft['最高']=dft['收盘指数']
1224
- dft['最低']=dft['收盘指数']
1225
- dft['成交量']=0; dft['成交额']=0
1226
- except:
1227
- fail_list=fail_list+[i]
1228
- continue
1229
-
1230
- dft['ticker']=dft['代码']
1231
- dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
1232
- dft.set_index('date',inplace=True)
1233
- dft['Open']=dft['开盘']
1234
- dft['High']=dft['最高']
1235
- dft['Low']=dft['最低']
1236
- dft['Close']=dft['收盘']
1237
- dft['Adj Close']=dft['收盘']
1238
- dft['Volume']=dft['成交量']
1239
- dft['Amount']=dft['成交额']
1240
-
1241
- dft.sort_index(ascending=True,inplace=True)
1242
- #dft1=dft[(dft.index>=start1) & (dft.index<=end1)]
1243
- dft2=dft[['ticker','Open','High','Low','Close','Adj Close','Volume','Amount']]
1244
-
1245
- #计算指标
1246
- dft3=all_calculate(dft2,i,start,end)
1247
- dft4=dft3.tail(1)
1248
-
1249
- #记录
1250
- idate=dft4.index.values[0]
1251
- idate=pd.to_datetime(idate)
1252
- iend=idate.strftime('%Y-%m-%d')
1253
- try:
1254
- ivalue=round(dft4[measure].values[0],2)
1255
- s=pd.Series({'date':idate,'ticker':i,'start':start,'end':iend,'item':measure,'value':ivalue})
1256
- try:
1257
- df=df.append(s,ignore_index=True)
1258
- except:
1259
- df=df._append(s,ignore_index=True)
1260
- except:
1261
- print(" #Error(industry_ranking_sw): measure not supported",measure)
1262
- return None
1263
-
1264
- df.sort_values(by='value',ascending=True,inplace=True)
1265
- df['name']=df['ticker'].apply(lambda x: industry_sw_name(x))
1266
- df.set_index('name',inplace=True)
1267
- colname='value'
1268
- titletxt="行业/指数分析:业绩排名"
1269
- import datetime; today=datetime.date.today()
1270
- footnote0=ectranslate(measure)+' ==>\n'
1271
- footnote1='申万行业/指数分类,观察期:'+start+'至'+iend+'\n'
1272
- footnote2="数据来源: 申万宏源, "+str(today)
1273
- footnote=footnote0+footnote1+footnote2
1274
-
1275
- plot_barh(df,colname,titletxt,footnote,axisamp=axisamp)
1276
- #plot_barh2(df,colname,titletxt,footnote)
1277
-
1278
- if len(fail_list) > 0:
1279
- print(" Unable to retrieve",len(fail_list),"industry(ies) as follows:",end='')
1280
- if len(fail_list) >= 10:
1281
- printInLine_md(fail_list,numberPerLine=10,colalign='left',font_size='16px')
1282
- else:
1283
- printInLine_md(fail_list,numberPerLine=len(fail_list),colalign='left',font_size='16px')
1284
- print('') #空一行
1285
-
1286
- return df
1287
-
1288
- if __name__=='__main__':
1289
- start='2018-1-1'
1290
- end='2022-10-31'
1291
- measure='Exp Ret%'
1292
- itype='1'
1293
- graph=True
1294
- axisamp=0.8
1295
-
1296
- df=industry_ranking_sw(start,end,measure='Exp Ret%',axisamp=0.8)
1297
-
1298
- #==============================================================================
1299
- def industry_ranking_sw2(industrylist,start,end,measure='Exp Ret%', \
1300
- period="day", \
1301
- graph=True,axisamp=0.8):
1302
- """
1303
- 完整版,全流程
1304
- 功能:模板,遍历某些指定的申万指数,计算某项业绩指标,汇集排序
1305
- 特点:不限类别,自由指定申万指数;指定行业指定指标横截面对比
1306
- period="day"; choice of {"day", "week", "month"}
1307
- 绘图:柱状图,可选
1308
- """
1309
- industry_list1=[]
1310
- for i in industrylist:
1311
- i=i.split('.')[0]
1312
- industry_list1=industry_list1+[i]
1313
- industrylist=industry_list1
1314
-
1315
- #检查日期的合理性
1316
- result,start1,end1=check_period(start,end)
1317
-
1318
- #检查itype的合理性
1319
-
1320
- #获得指数代码
1321
- ilist=industrylist
1322
-
1323
- #循环获取指标
1324
- import pandas as pd
1325
- import akshare as ak
1326
- import datetime
1327
- df=pd.DataFrame(columns=['date','ticker','start','end','item','value'])
1328
-
1329
- print("\nSearching industry prices, it may take great time, please wait ...")
1330
- for i in ilist:
1331
-
1332
- print(" Processing industry",i,"\b, please wait ...")
1333
- #抓取指数价格,选取期间范围
1334
- try:
1335
- dft = ak.index_hist_sw(symbol=i,period="day")
1336
- except:
1337
- try:
1338
- dft = ak.index_hist_fund_sw(symbol=i,period="day")
1339
- dft['代码']=i
1340
- dft['收盘']=dft['收盘指数']
1341
- dft['开盘']=dft['收盘指数']
1342
- dft['最高']=dft['收盘指数']
1343
- dft['最低']=dft['收盘指数']
1344
- dft['成交量']=0; dft['成交额']=0
1345
- except:
1346
- print(" #Warning(industry_ranking_sw2): index not found for",i)
1347
- continue
1348
-
1349
- dft['ticker']=dft['代码']
1350
- dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
1351
- dft.set_index('date',inplace=True)
1352
- dft['Open']=dft['开盘']
1353
- dft['High']=dft['最高']
1354
- dft['Low']=dft['最低']
1355
- dft['Close']=dft['收盘']
1356
- dft['Adj Close']=dft['收盘']
1357
- dft['Volume']=dft['成交量']
1358
- dft['Amount']=dft['成交额']
1359
-
1360
- dft.sort_index(ascending=True,inplace=True)
1361
- #dft1=dft[(dft.index>=start1) & (dft.index<=end1)]
1362
- dft2=dft[['ticker','Open','High','Low','Close','Adj Close','Volume','Amount']]
1363
-
1364
- #计算指标
1365
- dft3=all_calculate(dft2,i,start,end)
1366
- dft4=dft3.tail(1)
1367
-
1368
- #记录
1369
- idate=dft4.index.values[0]
1370
- idate=pd.to_datetime(idate)
1371
- iend=idate.strftime('%Y-%m-%d')
1372
- try:
1373
- ivalue=round(dft4[measure].values[0],2)
1374
- s=pd.Series({'date':idate,'ticker':i,'start':start,'end':iend,'item':measure,'value':ivalue})
1375
- try:
1376
- df=df.append(s,ignore_index=True)
1377
- except:
1378
- df=df._append(s,ignore_index=True)
1379
- except:
1380
- print(" #Error(industry_ranking_sw): measure not supported",measure)
1381
- return None
1382
-
1383
- df.sort_values(by='value',ascending=True,inplace=True)
1384
- df['name']=df['ticker'].apply(lambda x: industry_sw_name(x))
1385
- df.set_index('name',inplace=True)
1386
-
1387
- df.dropna(inplace=True)
1388
-
1389
- colname='value'
1390
- titletxt="行业/指数分析:业绩排名"
1391
- import datetime; today=datetime.date.today()
1392
- footnote0=ectranslate(measure)+' ==>\n'
1393
- footnote1='申万行业/指数分类,观察期:'+start+'至'+iend+'\n'
1394
- footnote2="数据来源: 申万宏源, "+str(today)
1395
- footnote=footnote0+footnote1+footnote2
1396
-
1397
- plot_barh(df,colname,titletxt,footnote,axisamp=axisamp)
1398
- #plot_barh2(df,colname,titletxt,footnote)
1399
-
1400
- return df
1401
- #==============================================================================
1402
- if __name__=='__main__':
1403
- start='2018-1-1'
1404
- end='2022-10-31'
1405
- measure='Exp Ret%'
1406
- itype='F'
1407
- period="day"
1408
- industry_list='all'
1409
-
1410
- def get_industry_sw(itype='1',period="day",industry_list='all',max_sleep=30):
1411
- """
1412
- 功能:遍历某类申万指数,下载数据
1413
- itype:
1414
- 股票类指数:F表征指数,n=1/2/3行业指数,S风格指数,B大类风格指数,C金创指数?
1415
- 基金类指数:J1/2/3基础一二三级,JF特色指数
1416
-
1417
- period="day"; choice of {"day", "week", "month"}
1418
- industry_list: 允许选择部分行业
1419
- """
1420
-
1421
- #检查itype的合理性
1422
- typelist=['F','1','2','3','S','B','C','A']
1423
- if not (itype in typelist):
1424
- print(" #Error(get_industry_sw): unsupported industry category",itype)
1425
- print(" Supported industry category",typelist)
1426
- print(" F: Featured, n-Level n industry, S-Styled, B- Big Styled, C- Financial Innovation, A-All (more time))")
1427
- return None
1428
-
1429
- #获得指数代码
1430
- if industry_list=='all':
1431
- idf=industry_sw_list()
1432
-
1433
- if itype == 'A':
1434
- ilist=list(idf['code'])
1435
- else:
1436
- idf1=idf[idf['type']==itype]
1437
- ilist=list(idf1['code'])
1438
- else:
1439
- ilist=industry_list
1440
-
1441
- #循环获取指标
1442
- import pandas as pd
1443
- import akshare as ak
1444
- import datetime; import random; import time
1445
- df=pd.DataFrame()
1446
-
1447
- print(" Searching industry data, it takes time, please wait ...")
1448
- num=len(ilist)
1449
- if num <= 10:
1450
- steps=5
1451
- else:
1452
- steps=10
1453
-
1454
- total=len(ilist)
1455
- fail_list=[]
1456
- for i in ilist:
1457
- #print_progress_percent2(i,ilist,steps=5,leading_blanks=4)
1458
- #print(" Retrieving information for industry",i)
1459
-
1460
- #抓取指数价格
1461
- try:
1462
- dft = ak.index_hist_sw(symbol=i,period="day")
1463
- except:
1464
- try:
1465
- dft = ak.index_hist_fund_sw(symbol=i,period="day")
1466
- dft['代码']=i
1467
- dft['收盘']=dft['收盘指数']
1468
- dft['开盘']=dft['收盘指数']
1469
- dft['最高']=dft['收盘指数']
1470
- dft['最低']=dft['收盘指数']
1471
- dft['成交量']=0; dft['成交额']=0
1472
- except:
1473
- #print(" #Warning(get_industry_sw): unsupported industry",i)
1474
- fail_list=fail_list+[i]
1475
- continue
1476
-
1477
- dft['ticker']=dft['代码']
1478
- dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
1479
- dft.set_index('date',inplace=True)
1480
- dft['Open']=dft['开盘']
1481
- dft['High']=dft['最高']
1482
- dft['Low']=dft['最低']
1483
- dft['Close']=dft['收盘']
1484
- dft['Adj Close']=dft['收盘']
1485
- dft['Volume']=dft['成交量']
1486
- dft['Amount']=dft['成交额']
1487
-
1488
- dft.sort_index(ascending=True,inplace=True)
1489
- dft2=dft[['ticker','Open','High','Low','Close','Adj Close','Volume','Amount']]
1490
- try:
1491
- df=df.append(dft2)
1492
- except:
1493
- df=df._append(dft2)
1494
-
1495
- current=ilist.index(i)
1496
- #print_progress_percent(current,total,steps=steps,leading_blanks=2)
1497
-
1498
- print_progress_percent2(i,ilist,steps=steps,leading_blanks=4)
1499
- #生成随机数睡眠,试图防止被反爬虫,不知是否管用!
1500
- random_int=random.randint(1,max_sleep)
1501
- time.sleep(random_int)
1502
-
1503
- #num=list(set(list(df['ticker'])))
1504
- if len(df)>0:
1505
- print(" Successfully retrieved",len(df),"records in",len(ilist)-len(fail_list),"industries")
1506
-
1507
- if len(fail_list) > 0:
1508
- print(" Failed to retrieve",len(fail_list),"industry(ies) as follows:")
1509
- if len(fail_list) >= 10:
1510
- printInLine_md(fail_list,numberPerLine=10,colalign='left',font_size='16px')
1511
- else:
1512
- printInLine_md(fail_list,numberPerLine=len(fail_list),colalign='left',font_size='16px')
1513
-
1514
- return df
1515
-
1516
-
1517
- if __name__=='__main__':
1518
- df=get_industry_sw('F')
1519
-
1520
- #==============================================================================
1521
- if __name__=='__main__':
1522
- start='2018-1-1'
1523
- end='2022-10-31'
1524
- measure='Exp Ret%'
1525
- period="day"
1526
- industry_list=['850831.SW','801785.SW','801737.SW','801194.SW',
1527
- '801784.SW','801783.SW','801782.SW']
1528
-
1529
- def get_industry_sw2(industry_list,period="day",max_sleep=30):
1530
- """
1531
- 功能:遍历指定的申万指数列表,下载数据
1532
- period="day"; choice of {"day", "week", "month"}
1533
- """
1534
- industry_list1=[]
1535
- for i in industry_list:
1536
- i=i.split('.')[0]
1537
- industry_list1=industry_list1+[i]
1538
- industry_list=industry_list1
1539
-
1540
- #循环获取指标
1541
- import pandas as pd
1542
- import akshare as ak
1543
- import datetime; import random; import time
1544
- df=pd.DataFrame()
1545
-
1546
- print(" Searching industry information, it takes time, please wait ...")
1547
- ilist=industry_list
1548
- num=len(ilist)
1549
- if num <= 10:
1550
- steps=5
1551
- else:
1552
- steps=10
1553
-
1554
- total=len(ilist)
1555
- fail_list=[]
1556
- for i in ilist:
1557
- #print(" Retrieving information for industry",i)
1558
- #抓取指数价格
1559
- try:
1560
- dft = ak.index_hist_sw(symbol=i,period="day")
1561
- except:
1562
- try:
1563
- dft = ak.index_hist_fund_sw(symbol=i,period="day")
1564
- dft['代码']=i
1565
- dft['收盘']=dft['收盘指数']
1566
- dft['开盘']=dft['收盘指数']
1567
- dft['最高']=dft['收盘指数']
1568
- dft['最低']=dft['收盘指数']
1569
- dft['成交量']=0; dft['成交额']=0
1570
- except:
1571
- #print(" #Warning(get_industry_sw): unsupported industry",i)
1572
- fail_list=fail_list+[i]
1573
- continue
1574
-
1575
- dft['ticker']=dft['代码']
1576
- dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
1577
- dft.set_index('date',inplace=True)
1578
- dft['Open']=dft['开盘']
1579
- dft['High']=dft['最高']
1580
- dft['Low']=dft['最低']
1581
- dft['Close']=dft['收盘']
1582
- dft['Adj Close']=dft['收盘']
1583
- dft['Volume']=dft['成交量']
1584
- dft['Amount']=dft['成交额']
1585
-
1586
- dft.sort_index(ascending=True,inplace=True)
1587
- dft2=dft[['ticker','Open','High','Low','Close','Adj Close','Volume','Amount']]
1588
- try:
1589
- df=df.append(dft2)
1590
- except:
1591
- df=df._append(dft2)
1592
-
1593
- current=ilist.index(i)
1594
- print_progress_percent(current,total,steps=steps,leading_blanks=2)
1595
-
1596
- #生成随机数睡眠,试图防止被反爬虫,不知是否管用!
1597
- random_int=random.randint(1,max_sleep)
1598
- time.sleep(random_int)
1599
-
1600
- #num=list(set(list(df['ticker'])))
1601
- if len(df) > 0:
1602
- print("\n Successfully retrieved",len(df),"records in",len(ilist)-len(fail_list),"industries")
1603
-
1604
- if len(fail_list) > 0:
1605
- print(" Failed to retrieve",len(fail_list),"industry(ies) as follows:")
1606
- if len(fail_list) >= 10:
1607
- printInLine_md(fail_list,numberPerLine=10,colalign='left',font_size='16px')
1608
- else:
1609
- printInLine_md(fail_list,numberPerLine=len(fail_list),colalign='left',font_size='16px')
1610
-
1611
-
1612
- return df
1613
-
1614
- #==============================================================================
1615
- if __name__=='__main__':
1616
- start='2023-8-31'
1617
- end='2024-9-30'
1618
- df=get_industry_sw('F')
1619
-
1620
- def calc_industry_sw(df,start,end):
1621
- """
1622
- 功能:遍历某类申万指数,计算某项业绩指标,汇集排序
1623
- df: 来自于get_industry_sw
1624
- 输出:最新时刻数据idf,全部时间序列数据idfall
1625
- """
1626
- #检查日期的合理性
1627
- result,start1,end1=check_period(start,end)
1628
- if not result:
1629
- print(" #Warning(calc_industry_sw): invalid date period",start,end)
1630
- return None
1631
-
1632
- #屏蔽函数内print信息输出的类
1633
- import os, sys
1634
- class HiddenPrints:
1635
- def __enter__(self):
1636
- self._original_stdout = sys.stdout
1637
- sys.stdout = open(os.devnull, 'w')
1638
-
1639
- def __exit__(self, exc_type, exc_val, exc_tb):
1640
- sys.stdout.close()
1641
- sys.stdout = self._original_stdout
1642
-
1643
- #获得指数代码
1644
- ilist=list(set(list(df['ticker'])))
1645
- ilist.sort()
1646
-
1647
- #循环获取指标
1648
- import pandas as pd
1649
- import datetime
1650
- idf=pd.DataFrame()
1651
- idfall=pd.DataFrame()
1652
-
1653
- print(" Calculating industry performance, please wait ...")
1654
- num=len(ilist)
1655
- if num <= 10:
1656
- steps=5
1657
- else:
1658
- steps=10
1659
-
1660
- total=len(ilist)
1661
- ignored_list=[]
1662
- for i in ilist:
1663
-
1664
- #print(" Processing industry",i)
1665
-
1666
- #切片一个指数的历史价格
1667
- dft = df[df['ticker']==i]
1668
- # 若无数据则处理下一个
1669
- if len(dft)==0: continue
1670
-
1671
- dft.sort_index(ascending=True,inplace=True)
1672
- dft2=dft
1673
-
1674
- #计算指标
1675
- try:
1676
- with HiddenPrints():
1677
- dft3=all_calculate(dft2,i,start,end)
1678
- except:
1679
- ignored_list=ignored_list+[i]
1680
- #print(" #Warning(calc_industry_sw): A problem occurs for industry",i)
1681
- continue
1682
- if dft3 is None:
1683
- ignored_list=ignored_list+[i]
1684
- #print(" #Warning(calc_industry_sw): Shenwan index",i,"may be discontinued before",start,"\b, ignored.")
1685
- continue
1686
-
1687
- dft3['start']=start
1688
-
1689
- #截取绘图区间
1690
- dft3a=dft3[(dft3.index >= start1) & (dft3.index <= end1)]
1691
-
1692
- dft4=dft3a.tail(1)
1693
- try:
1694
- idf=idf.append(dft4)
1695
- idfall=idfall.append(dft3a)
1696
- except:
1697
- idf=idf._append(dft4)
1698
- idfall=idfall._append(dft3a)
1699
-
1700
- current=ilist.index(i)
1701
- print_progress_percent(current,total,steps=steps,leading_blanks=2)
1702
-
1703
- ignored_num=len(ignored_list)
1704
- print(" Successfully processed",len(ilist)-ignored_num,"industries,",ignored_num,"industry(ies) ignored")
1705
- if ignored_num>0:
1706
- print(" Ignored industry(ies):",ignored_list)
1707
-
1708
- return idf,idfall
1709
-
1710
- if __name__=='__main__':
1711
- start='2018-1-1'
1712
- end='2022-10-31'
1713
- idf,idfall=calc_industry_sw(df,start,end)
1714
-
1715
- #==============================================================================
1716
- #==============================================================================
1717
- if __name__=='__main__':
1718
- measure='Exp Ret%'
1719
- industries=[]
1720
- graph=True
1721
- axisamp=0.8
1722
- px=False
1723
- maxitems=32
1724
- printout=False
1725
-
1726
- industries=['801770.SW','801720.SW','医药生物']
1727
-
1728
- def rank_industry_sw(idf,measure='Exp Ret%',industries=[], \
1729
- graph=True,axisamp=0.8,px=False,maxitems=32, \
1730
- printout=False,facecolor='papayawhip',font_size='16px'):
1731
- """
1732
- ===========================================================================
1733
- 功能:遍历某类申万指数的某项业绩指标,汇集排序,绘制水平柱状图
1734
- 主要参数:
1735
- idf:行业基础数据,由下列指令生成:
1736
- idf,idfall=get_industry_data_sw(start,end,sw_level='1')
1737
- measure:排名指标,默认'Exp Ret%'
1738
- industries:指定排名哪些特定行业,默认全部行业[]
1739
- graph:是否绘图,默认True
1740
- maxitems:一幅图最多绘制maxitems个项目,默认32
1741
- axisamp=0.9:调节水平柱子伸缩比例,数值越大越收缩,数值越小越放大,有时也需要负数
1742
- px=False:默认不使用plotly express
1743
- printout:是否打印结果数值,默认False
1744
- facecolor:背景颜色,默认'papayawhip'
1745
- font_size:输出表格的字体大小,默认'16px'
1746
-
1747
- 示例:
1748
- df1=rank_industry_sw(idf,measure='Exp Ret%',
1749
- axisamp=0.85)
1750
- # 选择感兴趣的行业,观察其持有收益率的发展趋势
1751
- industries1=industry_sw_codes(['电力设备','食品饮料','国防军工','银行'])
1752
- df1i=compare_industry_sw(idfall,industries1,measure='Exp Ret%')
1753
-
1754
- df1risk=rank_industry_sw(idf,measure='Exp Ret Volatility%',
1755
- axisamp=1.6)
1756
-
1757
- # 全行业,夏普比率横向对比
1758
- df1sharpe=rank_industry_sw_sharpe(idfall,base_return='Exp Ret%',axisamp=0.8)
1759
- """
1760
- industry_list1=[]
1761
- for i in industries:
1762
- i=i.split('.')[0]
1763
- industry_list1=industry_list1+[i]
1764
- industries=industry_list1
1765
-
1766
- import pandas as pd
1767
- import datetime as dt
1768
-
1769
- idf['Date']= pd.to_datetime(idf.index)
1770
- idf['end'] = idf['Date'].dt.strftime('%Y-%m-%d')
1771
-
1772
- #获得指标数据
1773
- try:
1774
- gdf=idf[['ticker',measure,'start','end']]
1775
- num1=len(gdf)
1776
- except:
1777
- print(" #Error(rank_industry_sw): unsupported measurement",measure)
1778
- return None
1779
-
1780
- gdf.dropna(inplace=True)
1781
- num2=len(gdf)
1782
- if num2==0:
1783
- print(" #Error(rank_industry_sw): no data found for",measure)
1784
- return None
1785
-
1786
- if num2 < num1:
1787
- print(" #Warning(rank_industry_sw):",num1-num2,"industries removed as no enough data found for",measure)
1788
-
1789
- gdf[measure]=gdf[measure].apply(lambda x: round(x,1))
1790
- istart=gdf['start'].values[0]
1791
- idate=gdf.index.values[0]
1792
- idate=pd.to_datetime(idate)
1793
- iend=idate.strftime('%Y-%m-%d')
1794
-
1795
- gdf['name']=gdf['ticker'].apply(lambda x: industry_sw_name(x))
1796
- gdf.set_index('name',inplace=True)
1797
- gdf.sort_values(by=measure,ascending=True,inplace=True)
1798
-
1799
- if len(industries) > 0:
1800
- gdf1a=gdf[gdf.index.isin(industries)]
1801
- gdf1b=gdf[gdf.ticker.isin(industries)]
1802
- gdf1=pd.concat([gdf1a,gdf1b])
1803
- gdf1.sort_values(by=measure,ascending=True,inplace=True)
1804
- else:
1805
- gdf1=gdf
1806
-
1807
- if printout or graph:
1808
- titletxt="行业板块/指数分析:最新业绩排名"
1809
- import datetime; today=datetime.date.today()
1810
- footnote0=ectranslate(measure)+' -->\n\n'
1811
- footnote1='申万行业/指数分类,'+iend+'快照'
1812
- footnote2='观察期:'+istart+'至'+iend+','
1813
- footnote3="数据来源: 申万宏源, "+str(today)+'统计'
1814
- footnote=footnote0+footnote1+'\n'+footnote2+footnote3
1815
-
1816
- if printout or (len(gdf1) > maxitems):
1817
- gdf2=gdf1.sort_values(by=measure,ascending=False)
1818
- gdf2.reset_index(inplace=True)
1819
- gdf2.index=gdf2.index+1
1820
- gdf2.columns=['行业/指数名称','行业/指数代码',ectranslate(measure),'开始日期','结束日期']
1821
- """
1822
- print("***",titletxt,'\n')
1823
- alignlist=['center']+['left']*(len(list(gdf2))-1)
1824
- print(gdf2.to_markdown(index=True,tablefmt='plain',colalign=alignlist))
1825
- """
1826
- #确定表格字体大小
1827
- titile_font_size=font_size
1828
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
1829
-
1830
- df_display_CSS(gdf2,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
1831
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1832
- data_font_size=data_font_size)
1833
-
1834
- if graph:
1835
- if (len(gdf1) <= maxitems):
1836
- colname=measure
1837
- if not px:
1838
- footnote=footnote0+footnote1+'\n'+footnote2+footnote3
1839
- plot_barh(gdf1,colname,titletxt,footnote,axisamp=axisamp)
1840
- else: #使用plotly_express
1841
- titletxt="行业板块/指数业绩排名:"+ectranslate(measure)
1842
- footnote=footnote1+'。'+footnote2+footnote3
1843
- plot_barh2(gdf1,colname,titletxt,footnote)
1844
- else:
1845
- print("\n #Sorry, there are too much items to be illustrated")
1846
- print(" Solution: select some of them and use the industries=[] option")
1847
-
1848
- return gdf
1849
-
1850
- if __name__=='__main__':
1851
- measure='Exp Ret%'
1852
- axisamp=0.8
1853
-
1854
- gdf=analyze_industry_sw(idf,measure='Exp Ret%',axisamp=0.8)
1855
- gdf=analyze_industry_sw(idf,measure='Exp Ret Volatility%',axisamp=1.6)
1856
- gdf=analyze_industry_sw(idf,measure='Exp Ret LPSD%',axisamp=1.7)
1857
- gdf=analyze_industry_sw(idf,measure='Annual Ret Volatility%',axisamp=1.3)
1858
- gdf=analyze_industry_sw(idf,measure='Annual Ret%',axisamp=1.0)
1859
- gdf=analyze_industry_sw(idf,measure='Quarterly Ret%',axisamp=0.3)
1860
- gdf=analyze_industry_sw(idf,measure='Monthly Ret%',axisamp=0.6)
1861
-
1862
- #==============================================================================
1863
- if __name__=='__main__':
1864
- industry_list=['801050.SW','801080.SW']
1865
- measure='Exp Ret%'
1866
- start='2020-11-1'
1867
- end='2022-10-31'
1868
- itype='1'
1869
- period="day"
1870
- graph=True
1871
-
1872
- def compare_mindustry_sw(industry_list,measure,start,end, \
1873
- itype='1',period="day",graph=True,printout=False,sortby='tpw_mean'):
1874
- """
1875
- 功能:比较多个行业industry_list某个指标measure在时间段start/end的时间序列趋势
1876
- industry_list: 至少有两项,若太多了则生成的曲线过于密集
1877
- 特点:完整过程
1878
- """
1879
- """
1880
- #检查行业代码的个数不少于两个
1881
- if len(industry_list) < 2:
1882
- print(" #Warning(compare_mindustry_sw): need at least 2 indistries to compare")
1883
- return None
1884
- """
1885
- industry_list1=[]
1886
- for i in industry_list:
1887
- i=i.split('.')[0]
1888
- industry_list1=industry_list1+[i]
1889
- industry_list=industry_list1
1890
-
1891
- #检查行业代码是否在范围内
1892
- ilist_all=list(industry_sw_list()['code'])
1893
- for i in industry_list:
1894
- if not (i in ilist_all):
1895
- print(" #Warning(compare_mindustry_sw): unsupported industry",i)
1896
- return None
1897
-
1898
-
1899
- #检查日期期间的合理性
1900
- result,startpd,endpd=check_period(start,end)
1901
- if not result:
1902
- print(" #Error(compare_mindustry_sw): invalid date period",start,end)
1903
- return None
1904
-
1905
-
1906
- #获取数据
1907
- ddf=get_industry_sw(itype=itype,period=period,industry_list=industry_list)
1908
- found=df_have_data(ddf)
1909
- if not found=='Found':
1910
- print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
1911
- print(" Data is sometimes unavialble at certain tie points, eg public holidays")
1912
- return None
1913
-
1914
- #计算指标
1915
- _,idf=calc_industry_sw(ddf,start,end)
1916
-
1917
- #转换数据表结构为横排并列,适应绘图要求
1918
- ilist=list(set(list(idf['ticker'])))
1919
- import pandas as pd
1920
- dfs=pd.DataFrame()
1921
- notfoundlist=[]
1922
- for i in ilist:
1923
-
1924
- dft=idf[idf['ticker']==i]
1925
- istart=idf['start'].values[0]
1926
-
1927
- try:
1928
- dft1=pd.DataFrame(dft[measure])
1929
- except:
1930
- print(" #Error(compare_mindustry_sw) unsupported measurement",measure)
1931
- return None
1932
- dft1.dropna(inplace=True)
1933
- if len(dft1)==0:
1934
- notfoundlist=notfoundlist+[i]
1935
- continue
1936
-
1937
- dft1.rename(columns={measure:industry_sw_name(i)},inplace=True)
1938
- if len(dfs)==0:
1939
- dfs=dft1
1940
- else:
1941
- dfs=pd.merge(dfs,dft1,how='outer',left_index=True,right_index=True)
1942
-
1943
- if len(notfoundlist) > 0:
1944
- print(" #Warning(compare_mindustry_sw): industry measure not found",notfoundlist)
1945
-
1946
- #绘制多条曲线
1947
- idate=dfs.index.values[-1]
1948
- idate=pd.to_datetime(idate)
1949
- iend=idate.strftime('%Y-%m-%d')
1950
-
1951
- #截取绘图区间
1952
- result,istartpd,iendpd=check_period(istart,iend)
1953
- dfs1=dfs[(dfs.index >= istartpd) & (dfs.index <= iendpd)]
1954
-
1955
- y_label=measure
1956
- title_txt="行业板块/指数分析:市场业绩趋势与评价"
1957
- import datetime; today = datetime.date.today()
1958
- if graph:
1959
- colname=measure
1960
-
1961
- import datetime; today=datetime.date.today()
1962
- footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
1963
- footnote2="数据来源: 申万宏源, "+str(today)+'统计'
1964
- footnote=footnote1+footnote2
1965
-
1966
- draw_lines(dfs1,y_label,x_label=footnote, \
1967
- axhline_value=0,axhline_label='', \
1968
- title_txt=title_txt, \
1969
- data_label=False,resample_freq='H',smooth=True)
1970
-
1971
- if printout:
1972
- df2=dfs1
1973
- dfcols=list(df2)
1974
- for c in dfcols:
1975
- ccn=ticker_name(c)+'('+c+')'
1976
- df2.rename(columns={c:ccn},inplace=True)
1977
-
1978
- if sortby=='tpw_mean':
1979
- sortby_txt='按推荐标记+近期优先加权平均值降序排列'
1980
- elif sortby=='min':
1981
- sortby_txt='按推荐标记+最小值降序排列'
1982
- elif sortby=='mean':
1983
- sortby_txt='按推荐标记+平均值降序排列'
1984
- elif sortby=='median':
1985
- sortby_txt='按推荐标记+中位数值降序排列'
1986
- else:
1987
- pass
1988
-
1989
- title_txt=title_txt+':'+y_label+','+sortby_txt
1990
- additional_note="注:列表仅显示有星号标记或特定数量的证券。"
1991
- footnote='比较期间:'+start+'至'+end
1992
- ds=descriptive_statistics(df2,title_txt,additional_note+footnote,decimals=4, \
1993
- sortby=sortby,recommend_only=False)
1994
-
1995
- return dfs
1996
-
1997
- if __name__=='__main__':
1998
- mdf=compare_mindustry_sw(industry_list,measure,start,end)
1999
-
2000
- #==============================================================================
2001
- if __name__=='__main__':
2002
- industry_list=['801050.SW','801080.SW']
2003
- measure='Exp Ret%'
2004
- start='2023-1-1'
2005
- end='2023-4-11'
2006
- period="day"
2007
- graph=True
2008
- printout=False
2009
- sortby='tpw_mean'
2010
-
2011
- def compare_mindustry_sw2(industry_list,measure,start,end, \
2012
- period="day",graph=True,printout=False,sortby='tpw_mean'):
2013
- """
2014
- 功能:比较多个行业industry_list某个指标measure在时间段start/end的时间序列趋势
2015
- industry_list: 至少有两项,若太多了则生成的曲线过于密集
2016
- 特点:完整过程,无需规定申万行业类别;多个行业,单一指标
2017
- """
2018
- """
2019
- #检查行业代码的个数不少于两个
2020
- if len(industry_list) < 2:
2021
- print(" #Warning(compare_mindustry_sw): need at least 2 indistries to compare")
2022
- return None
2023
- """
2024
- industry_list1=[]
2025
- for i in industry_list:
2026
- i=i.split('.')[0]
2027
- industry_list1=industry_list1+[i]
2028
- industry_list=industry_list1
2029
-
2030
- #检查行业代码是否在范围内
2031
- ilist_all=list(industry_sw_list()['code'])
2032
- for i in industry_list:
2033
- if not (i in ilist_all):
2034
- if not i.isdigit():
2035
- print(" #Warning(compare_mindustry_sw): unsupported industry",i)
2036
- return None
2037
-
2038
- #检查日期期间的合理性
2039
- result,startpd,endpd=check_period(start,end)
2040
- if not result:
2041
- print(" #Error(compare_mindustry_sw): invalid date period",start,end)
2042
- return None
2043
-
2044
- #获取数据
2045
- ddf=get_industry_sw2(industry_list=industry_list,period=period)
2046
- found=df_have_data(ddf)
2047
- if not found=='Found':
2048
- print(" #Warning(compare_mindustry_sw): data tentatively unavailable for",industry_list)
2049
- print(" Data is sometimes unavialble at certain tie points, eg public holidays")
2050
- return None
2051
-
2052
- #计算指标
2053
- _,idf=calc_industry_sw(ddf,start,end)
2054
-
2055
- #转换数据表结构为横排并列,适应绘图要求
2056
- ilist=list(set(list(idf['ticker'])))
2057
- import pandas as pd
2058
- dfs=pd.DataFrame()
2059
- notfoundlist=[]
2060
- for i in ilist:
2061
-
2062
- dft=idf[idf['ticker']==i]
2063
- istart=idf['start'].values[0]
2064
-
2065
- try:
2066
- dft1=pd.DataFrame(dft[measure])
2067
- except:
2068
- print(" #Error(compare_mindustry_sw) unsupported measurement",measure)
2069
- return None
2070
- dft1.dropna(inplace=True)
2071
- if len(dft1)==0:
2072
- notfoundlist=notfoundlist+[i]
2073
- continue
2074
-
2075
- dft1.rename(columns={measure:industry_sw_name(i)},inplace=True)
2076
- if len(dfs)==0:
2077
- dfs=dft1
2078
- else:
2079
- dfs=pd.merge(dfs,dft1,how='outer',left_index=True,right_index=True)
2080
-
2081
- if len(notfoundlist) > 0:
2082
- print(" #Warning(compare_mindustry_sw): industry measure not found for",notfoundlist)
2083
-
2084
- #绘制多条曲线
2085
- idate=dfs.index.values[-1]
2086
- idate=pd.to_datetime(idate)
2087
- iend=idate.strftime('%Y-%m-%d')
2088
-
2089
- #截取绘图区间
2090
- result,istartpd,iendpd=check_period(istart,iend)
2091
- dfs1=dfs[(dfs.index >= istartpd) & (dfs.index <= iendpd)]
2092
-
2093
- y_label=measure
2094
- title_txt="行业(板块)/指数分析:市场业绩趋势与评价"
2095
- import datetime; today = datetime.date.today()
2096
- if graph:
2097
- colname=measure
2098
- title_txt="行业(板块)/指数分析:市场业绩趋势"
2099
- import datetime; today=datetime.date.today()
2100
- footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
2101
- footnote2="数据来源: 申万宏源, "+str(today)+'统计'
2102
- footnote=footnote1+footnote2
2103
-
2104
- draw_lines(dfs1,y_label,x_label=footnote, \
2105
- axhline_value=0,axhline_label='', \
2106
- title_txt=title_txt, \
2107
- data_label=False,resample_freq='H',smooth=True)
2108
-
2109
- if printout:
2110
- df2=dfs1
2111
- dfcols=list(df2)
2112
- for c in dfcols:
2113
- cname=ticker_name(c)
2114
- if cname == c:
2115
- ccn=c
2116
- else:
2117
- ccn=cname+'('+c+')'
2118
- df2.rename(columns={c:ccn},inplace=True)
2119
-
2120
- if sortby=='tpw_mean':
2121
- sortby_txt='按推荐标记+近期优先加权平均值降序排列'
2122
- elif sortby=='min':
2123
- sortby_txt='按推荐标记+最小值降序排列'
2124
- elif sortby=='mean':
2125
- sortby_txt='按推荐标记+平均值降序排列'
2126
- elif sortby=='median':
2127
- sortby_txt='按推荐标记+中位数值降序排列'
2128
- else:
2129
- pass
2130
-
2131
- title_txt='*** '+title_txt+':'+y_label+','+sortby_txt
2132
- additional_note="*** 注:列表仅显示有星号标记或特定数量的证券。"
2133
- footnote='比较期间:'+start+'至'+end
2134
- ds=descriptive_statistics(df2,title_txt,additional_note+footnote,decimals=4, \
2135
- sortby=sortby,recommend_only=False)
2136
-
2137
- return dfs
2138
-
2139
- if __name__=='__main__':
2140
- mdf=compare_mindustry_sw2(industry_list,measure,start,end)
2141
-
2142
- #==============================================================================
2143
- if __name__=='__main__':
2144
- industry_list=['801050.SW','801080.SW']
2145
- measure='Exp Ret%'
2146
- start='2020-11-1'
2147
- end='2022-10-31'
2148
- itype='1'
2149
- period="day"
2150
- graph=True
2151
-
2152
- def compare_industry_sw(idfall,industry_list,measure,graph=True):
2153
- """
2154
- 功能:比较多个行业industry_list某个指标measure在时间段start/end的时间序列趋势
2155
- industry_list: 至少有两项,若太多了则生成的曲线过于密集
2156
- 特点:需要依赖其他前序支持
2157
- #获取数据
2158
- ddf=get_industry_sw(itype=itype,period=period,industry_list=industry_list)
2159
-
2160
- #计算指标
2161
- idf=calc_industry_sw(ddf,start,end,latest=False)
2162
-
2163
- """
2164
- """
2165
- #检查行业代码的个数不少于两个
2166
- if len(industry_list) < 2:
2167
- print(" #Warning(compare_industry_sw): need at least 2 indistries to compare")
2168
- return None
2169
- """
2170
- industry_list1=[]
2171
- for i in industry_list:
2172
- i=i.split('.')[0]
2173
- industry_list1=industry_list1+[i]
2174
- industry_list=industry_list1
2175
-
2176
- #检查行业代码是否在范围内
2177
- ilist_all=list(industry_sw_list()['code'])
2178
- for i in industry_list:
2179
- if not (i in ilist_all):
2180
- if not i.isdigit():
2181
- print(" #Warning(compare_mindustry_sw): unsupported or no such industry",i)
2182
- return None
2183
-
2184
- #转换数据表结构为横排并列,适应绘图要求
2185
- import pandas as pd
2186
- dfs=pd.DataFrame()
2187
- notfoundlist=[]
2188
- for i in industry_list:
2189
-
2190
- try:
2191
- dft=idfall[idfall['ticker']==i]
2192
- except:
2193
- print(" #Error(compare_mindustry_sw) unsupported or no such industry",i)
2194
- return None
2195
-
2196
- if not (len(dft)==0):
2197
- istart=dft['start'].values[0]
2198
- else:
2199
- print(" #Error(compare_mindustry_sw) unsupported or no such industry",i)
2200
- return None
2201
-
2202
- try:
2203
- dft1=pd.DataFrame(dft[measure])
2204
- except:
2205
- print(" #Error(compare_mindustry_sw) unsupported measurement",measure)
2206
- return None
2207
- dft1.dropna(inplace=True)
2208
- if len(dft1)==0:
2209
- notfoundlist=notfoundlist+[i]
2210
- #print(" #Warning(compare_mindustry_sw): no data found for industry",i,"on",measure)
2211
- continue
2212
-
2213
- dft1.rename(columns={measure:industry_sw_name(i)},inplace=True)
2214
- if len(dfs)==0:
2215
- dfs=dft1
2216
- else:
2217
- dfs=pd.merge(dfs,dft1,how='outer',left_index=True,right_index=True)
2218
-
2219
- if len(notfoundlist)>0:
2220
- print(" #Warning(compare_mindustry_sw):",measure,"data not found for industries",notfoundlist)
2221
-
2222
- #绘制多条曲线
2223
- idate=dfs.index.values[-1]
2224
- idate=pd.to_datetime(idate)
2225
- iend=idate.strftime('%Y-%m-%d')
2226
-
2227
- #截取数据区间
2228
- result,istartpd,iendpd=check_period(istart,iend)
2229
- dfs1=dfs[(dfs.index >= istartpd) & (dfs.index <= iendpd)]
2230
-
2231
- if graph:
2232
- y_label=measure
2233
- colname=measure
2234
- title_txt="行业板块/指数分析:市场业绩趋势"
2235
-
2236
- import datetime; today=datetime.date.today()
2237
- footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
2238
- footnote2="数据来源: 申万宏源, "+str(today)+'统计'
2239
- footnote=footnote1+footnote2
2240
-
2241
- if 'Ret%' in measure:
2242
- axhline_label='收益零线'
2243
- else:
2244
- axhline_label=''
2245
-
2246
- draw_lines(dfs1,y_label,x_label=footnote, \
2247
- axhline_value=0,axhline_label=axhline_label, \
2248
- title_txt=title_txt, \
2249
- data_label=False,resample_freq='H',smooth=True)
2250
-
2251
- return dfs1
2252
-
2253
- if __name__=='__main__':
2254
- mdf=compare_industry_sw(idfall,industry_list,measure)
2255
-
2256
- #==============================================================================
2257
- if __name__=='__main__':
2258
- start='2018-1-1'
2259
- end='2022-10-31'
2260
- df=get_industry_sw('F')
2261
- idf,idfall=calc_industry_sw(df,start,end)
2262
- base_return='Annual Ret%'
2263
- graph=True
2264
-
2265
- def compare_industry_sw_sharpe(idfall,industries,base_return='Annual Ret%',graph=True):
2266
- """
2267
- 功能:比较申万行业的夏普比率
2268
- idfall: 由calc_industry_sw函数获得
2269
- industries: 仅限idfall中的行业
2270
-
2271
- 缺陷:未考虑无风险利率
2272
- """
2273
-
2274
- #获得年度收益率TTM
2275
- aret=compare_industry_sw(idfall,industries,measure=base_return,graph=False)
2276
- if aret is None:
2277
- return None
2278
-
2279
- #获得年度收益率波动率TTM
2280
- pos=base_return.index('%')
2281
- base_risk=base_return[:pos]+' Volatility%'
2282
- aretrisk=compare_industry_sw(idfall,industries,measure=base_risk,graph=False)
2283
-
2284
- #合成
2285
- industrylist=list(aret)
2286
- atmp=pd.merge(aret,aretrisk,how='inner',left_index=True,right_index=True)
2287
- for i in industrylist:
2288
- atmp[i]=atmp[i+'_x']/atmp[i+'_y']
2289
-
2290
- sdf=atmp[industrylist]
2291
- if graph:
2292
- y_label='夏普比率(基于'+ectranslate(base_return)+')'
2293
- title_txt="行业板块/指数分析:市场发展趋势"
2294
-
2295
- istart=sdf.index[0].strftime('%Y-%m-%d')
2296
- iend=sdf.index[-1].strftime('%Y-%m-%d')
2297
- footnote1='\n申万行业/指数分类,观察期:'+istart+'至'+iend+'\n'
2298
- import datetime; today=datetime.date.today()
2299
- #footnote2="数据来源: 申万宏源, "+str(today)+'统计(未计入无风险利率)'
2300
- footnote2="数据来源: 申万宏源, "+str(today)+'统计'
2301
- footnote=footnote1+footnote2
2302
-
2303
- if 'Ret%' in base_return:
2304
- axhline_label='收益零线'
2305
- else:
2306
- axhline_label=''
2307
-
2308
- draw_lines(sdf,y_label,x_label=footnote, \
2309
- axhline_value=0,axhline_label=axhline_label, \
2310
- title_txt=title_txt, \
2311
- data_label=False,resample_freq='H',smooth=True)
2312
-
2313
- return sdf
2314
-
2315
- if __name__=='__main__':
2316
- industries=['801005', '801270', '801250', '801260']
2317
- sdf=compare_industry_sw_sharpe(idfall,industries,base_return='Annual Ret%')
2318
- sdf=compare_industry_sw_sharpe(idfall,industries,base_return='Quarterly Ret%')
2319
-
2320
- sdf=compare_industry_sw_sharpe(idfall,industries,base_return='Exp Ret%')
2321
-
2322
- #==============================================================================
2323
- if __name__=='__main__':
2324
- start='2018-1-1'
2325
- end='2022-10-31'
2326
- df=get_industry_sw('F')
2327
- idf,idfall=calc_industry_sw(df,start,end)
2328
- base_return='Exp Ret%'
2329
- graph=True
2330
-
2331
- df=rank_industry_sw_sharpe(idfall,base_return='Exp Ret%',axisamp=0.8)
2332
-
2333
- def rank_industry_sw_sharpe(idfall,base_return='Exp Ret%',graph=True,axisamp=0.8,px=False):
2334
- """
2335
- 功能:比较申万行业最近的夏普比率,绘制水平柱状图
2336
- idfall: 由calc_industry_sw函数获得
2337
-
2338
- 缺陷:未考虑无风险利率
2339
- """
2340
-
2341
- allindustries=list(set(list(idfall['ticker'])))
2342
- df=compare_industry_sw_sharpe(idfall,allindustries,base_return=base_return,graph=False)
2343
- dftail1=df.tail(1)
2344
- dftail2=dftail1.T
2345
- col=list(dftail2)[0]
2346
-
2347
- dftail3=dftail2.sort_values(by=col,ascending=True)
2348
- dftail3[col]=dftail3[col].apply(lambda x: round(x,2))
2349
-
2350
- istart=idfall['start'].values[0]
2351
- idate=idfall.index.values[-1]
2352
- idate=pd.to_datetime(idate)
2353
- iend=idate.strftime('%Y-%m-%d')
2354
-
2355
- if graph:
2356
- colname=col
2357
- titletxt="行业板块/指数分析:最新业绩排名"
2358
- import datetime; today=datetime.date.today()
2359
- footnote0='夏普比率(基于'+ectranslate(base_return)+') -->\n\n'
2360
- footnote1='申万行业/指数分类,'+iend+'快照'
2361
- footnote2='观察期:'+istart+'至'+iend+','
2362
- footnote3="数据来源: 申万宏源, "+str(today)+'统计'
2363
- footnote=footnote0+footnote1+'\n'+footnote2+footnote3
2364
- if not px:
2365
- footnote=footnote0+footnote1+'\n'+footnote2+footnote3
2366
- plot_barh(dftail3,colname,titletxt,footnote,axisamp=axisamp)
2367
- else: #使用plotly_express
2368
- titletxt="行业板块/指数业绩排名:夏普比率(基于"+ectranslate(base_return)+')'
2369
- footnote=footnote1+'。'+footnote2+footnote3
2370
- plot_barh2(dftail3,colname,titletxt,footnote)
2371
-
2372
- return dftail3
2373
-
2374
-
2375
- #==============================================================================
2376
- if __name__=='__main__':
2377
- industry='850831.SW'
2378
- industry='801193.SW'
2379
- industry='851811.SW'
2380
- industry='801181.SW'
2381
- industry='801841.SW'
2382
-
2383
- top=5
2384
- df=industry_stock_sw(industry)
2385
-
2386
- def industry_stock_sw(industry='801270.SW',top=5,printout=False):
2387
- """
2388
- 功能:获取申万行业指数的成分股
2389
- 排序:按照权重从大到小,重仓优先
2390
- """
2391
- industry=industry.split('.')[0]
2392
-
2393
- # 检查行业代码的合理性
2394
- inddf=industry_sw_list()
2395
- ilist=list(inddf['code'])
2396
- if not (industry in ilist):
2397
- if not industry.isdigit():
2398
- print(" #Warning(industry_stock_sw): industry code not found for",industry)
2399
- return None,None
2400
-
2401
- import akshare as ak
2402
- try:
2403
- cdf = ak.index_component_sw(industry)
2404
- except:
2405
- print(" #Warning(industry_stock_sw): failed to retrieve component for index",industry)
2406
- print(" Try solution: upgrade akshare, restart jupyter and try again")
2407
- return None,None
2408
-
2409
- #去重,保留最新日期的记录
2410
- cdf.sort_values(by=['证券代码','计入日期'],ascending=[True,False],inplace=True)
2411
- cdf.drop_duplicates(subset=['证券代码'],keep='first',inplace=True)
2412
-
2413
- # 删除'证券名称'为None的行
2414
- cdf=cdf.mask(cdf.eq('None')).dropna()
2415
- cdf_total=len(cdf)
2416
-
2417
- #排名
2418
- cdf.sort_values(by='最新权重',ascending=False,inplace=True)
2419
- cdf.reset_index(drop=True,inplace=True)
2420
- cdf['序号']=cdf.index+1
2421
-
2422
- if top > 0:
2423
- cdf1=cdf.head(top)
2424
- else:
2425
- cdf1=cdf.tail(-top)
2426
- cdf1['最新权重']=cdf1['最新权重'].apply(lambda x: round(x,2))
2427
- cdf1['证券代码']=cdf1['证券代码'].apply(lambda x: x+'.SS' if x[:1] in ['6'] else (x+'.SZ' if x[:1] in ['0','3'] else x+'.BJ' ))
2428
-
2429
- clist=list(cdf1['证券代码'])
2430
- """
2431
- clist1=[]
2432
- for c in clist:
2433
- first=c[:1]
2434
- if first == '6':
2435
- clist1=clist1+[c+'.SS']
2436
- else:
2437
- clist1=clist1+[c+'.SZ']
2438
- """
2439
- if printout:
2440
- if '.SW' not in industry:
2441
- industry=industry+'.SW'
2442
- titletxt="申万指数成分证券:"+industry_sw_name(industry)+'('+industry+')'
2443
- import datetime as dt; todaydt=str(dt.date.today())
2444
- footnote="成分证券数量:"+str(cdf_total)+",申万宏源,"+str(todaydt)
2445
-
2446
- #df_directprint(cdf1,title_txt,footnote)
2447
- df_display_CSS(cdf1,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=3, \
2448
- first_col_align='center',second_col_align='left', \
2449
- last_col_align='left',other_col_align='right', \
2450
- titile_font_size='16px',heading_font_size='15px', \
2451
- data_font_size='15px')
2452
-
2453
- #return clist1,cdf1
2454
- return clist,cdf1
2455
-
2456
- if __name__=='__main__':
2457
- clist,cdf=industry_stock_sw(industry='801005',top=10)
2458
- clist,cdf=industry_stock_sw(industry='850831',top=-10)
2459
- #==============================================================================
2460
-
2461
- def get_industry_data_sw(start,end,sw_level='1'):
2462
- """
2463
- 功能:获得申万行业历史数据, 套壳函数
2464
- start: 开始日期
2465
- end: 结束日期
2466
- sw_level: '1', '2', '3', 'F', 'S', 'B', 'C'
2467
-
2468
- 返回:idf, idfall,供进一步分析使用。
2469
- """
2470
- itype_list=['1','2','3','F','S', 'B', 'C']
2471
- sw_level_list=['1','2','3','F','S', 'B', 'C']
2472
- pos=sw_level_list.index(sw_level)
2473
- itype=itype_list[pos]
2474
-
2475
- idf,idfall=get_industry_info_sw(start=start,end=end,itype=itype)
2476
-
2477
- return idf,idfall
2478
-
2479
-
2480
- if __name__ =="__main__":
2481
-
2482
- # 新冠疫情三年
2483
- start='2023-1-1'; end='2023-4-10'
2484
- itype='F'
2485
-
2486
- idf,idfall=get_industry_info_sw(start,end,itype='1')
2487
-
2488
- def get_industry_info_sw(start,end,itype='1'):
2489
- """
2490
- 功能:获得申万行业历史数据
2491
- start: 开始日期
2492
- end: 结束日期
2493
-
2494
- 返回:idf, idfall,供进一步分析使用。
2495
- """
2496
-
2497
- # 检查日期期间的合理性
2498
- result,startpd,endpd=check_period(start,end)
2499
- if not result:
2500
- print(" #Error(get_industry_info_sw): invalid date period from",start,'to',end)
2501
- return None,None
2502
-
2503
- print("This may need great great time depending on network/computer speed, take a break ...")
2504
- print("\n*** Step 1:")
2505
- # 获取行业历史数据,本步骤所需时间较长
2506
- df=get_industry_sw(itype=itype)
2507
- found=df_have_data(df)
2508
- if not found=='Found':
2509
- print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
2510
- print(" Data is sometimes unavialble at certain time points, try again later")
2511
- return None
2512
-
2513
- print("\n*** Step 2:")
2514
- # 计算基础数据,本步骤所需时间较长
2515
- idf,idfall=calc_industry_sw(df,start,end)
2516
-
2517
- return idf,idfall
2518
-
2519
- #==============================================================================
2520
- if __name__ =="__main__":
2521
-
2522
- # 新冠疫情三年
2523
- industry_list=['850831','801785','801737','801194','801784','801783','801782']
2524
- start='2023-1-1'; end='2023-4-3'
2525
-
2526
- def get_industry_info_sw2(industry_list,start,end):
2527
- """
2528
- 功能:获得申万行业历史数据
2529
- start: 开始日期
2530
- end: 结束日期
2531
- 特点:指定行业,可以混合各种指数
2532
-
2533
- 返回:idf, idfall,供进一步分析使用。
2534
- """
2535
-
2536
- # 检查日期期间的合理性
2537
- result,startpd,endpd=check_period(start,end)
2538
- if not result:
2539
- print(" #Error(get_industry_info_sw2): invalid date period from",start,'to',end)
2540
- return None,None
2541
-
2542
- print("This may need great time depending on network/computer speed, take a break ...")
2543
- print("\n*** Step 1:")
2544
- # 获取行业历史数据,本步骤所需时间较长
2545
- df=get_industry_sw2(industry_list)
2546
- found=df_have_data(df)
2547
- if not found=='Found':
2548
- print(" #Warning(compare_mindustry_sw): data tentatively unavailable for",industry_list)
2549
- print(" Data is sometimes unavialble at certain time points, try again later")
2550
- return None
2551
-
2552
- print("\n*** Step 2:")
2553
- # 计算基础数据,本步骤所需时间较长
2554
- idf,idfall=calc_industry_sw(df,start,end)
2555
-
2556
- return idf,idfall
2557
-
2558
- #==============================================================================
2559
- if __name__ =="__main__":
2560
- start='2022-1-1'
2561
- end='2022-12-20'
2562
- tickers=['600600.SS','600132.SS','000729.SZ','002461.SZ','600573.SS']
2563
- measures=['Exp Ret%']
2564
- market_index='000001.SS'
2565
- window=252
2566
- colalign='right'
2567
-
2568
- rs=rank_msecurity_performance(tickers,start,end,measures=['Exp Ret%'])
2569
-
2570
- def rank_msecurity_performance(tickers,start,end, \
2571
- measures=['Exp Ret%'], \
2572
- market_index='000001.SS',window=252,colalign='right', \
2573
- facecolor='papayawhip',font_size='16px'):
2574
- """
2575
- 功能:列示多只股票多个指标的对比,从高到低
2576
-
2577
- """
2578
- print("Searching for multiple security information, please wait ......")
2579
- #屏蔽函数内print信息输出的类
2580
- import os, sys
2581
- class HiddenPrints:
2582
- def __enter__(self):
2583
- self._original_stdout = sys.stdout
2584
- sys.stdout = open(os.devnull, 'w')
2585
-
2586
- def __exit__(self, exc_type, exc_val, exc_tb):
2587
- sys.stdout.close()
2588
- sys.stdout = self._original_stdout
2589
-
2590
- rar_list=['treynor','sharpe','sortino','alpha']
2591
- rar_list_e=['Treynor Ratio','Sharpe Ratio','Sortino Ratio','Jensen alpha']
2592
- rar_list_c=['特雷诺比率','夏普比率','索替诺比率','阿尔法值']
2593
-
2594
- import pandas as pd
2595
- df=pd.DataFrame()
2596
- allmeasures=measures+rar_list
2597
- for m in allmeasures:
2598
- # 显示进度条
2599
- print_progress_percent2(m,allmeasures,steps=len(allmeasures),leading_blanks=4)
2600
-
2601
- if not (m in rar_list):
2602
- with HiddenPrints():
2603
- dft=compare_msecurity(tickers,measure=m,start=start,end=end,graph=False)
2604
-
2605
- #修改列明为股票名称(股票代码)格式,以便与compare_mrar的结果一致
2606
- dft_new_cols=[]
2607
- for t in tickers:
2608
- c=ticker_name(t)+'('+t+')'
2609
- dft_new_cols=dft_new_cols+[c]
2610
- dft.columns=dft_new_cols
2611
-
2612
- dft['指标']=ectranslate(m)
2613
- else:
2614
- with HiddenPrints():
2615
- dft=compare_mrar(tickers,rar_name=m,start=start,end=end, \
2616
- market_index=market_index,window=window,graph=False)
2617
- mpos=rar_list.index(m)
2618
- mname=rar_list_c[mpos]
2619
- dft['指标']=mname
2620
-
2621
- del dft['time_weight']
2622
- del dft['relative_weight']
2623
-
2624
- dft1=dft.tail(1)
2625
- cols1=list(dft1)
2626
- cols1.remove('指标')
2627
- for c in cols1:
2628
- dft1[c]=dft1[c].apply(lambda x: round(float(x),4))
2629
-
2630
- if len(df) == 0:
2631
- df=dft1
2632
- else:
2633
- df=pd.concat([df,dft1])
2634
-
2635
- df.set_index('指标',inplace=True)
2636
- df1=df.T
2637
- cols=list(df1)
2638
-
2639
- # 横向指标求和,作为排序依据
2640
- #df1['value']=df1.loc[:,cols].apply(lambda x: x.sum(),axis=1)
2641
- df1.sort_values('夏普比率',ascending=False,inplace=True)
2642
- #del df1['value']
2643
-
2644
- df1.reset_index(inplace=True)
2645
- df1.rename(columns={'index':'股票'},inplace=True)
2646
-
2647
- """
2648
- alignlist=['left']+[colalign]*(len(allmeasures)-1)
2649
-
2650
- print("\n*** 股票多重指标比较:按夏普比率降序排列\n")
2651
- print(df1.to_markdown(index=False,tablefmt='plain',colalign=alignlist))
2652
-
2653
- print("\n*** 观察期:",start,'至',end,'\b,表中数据为',end+'快照')
2654
- print(" 表中的夏普比率/索替诺比率/阿尔法值均为TTM滚动值")
2655
- import datetime; today=datetime.date.today()
2656
- print(" 数据来源:新浪财经/东方财富,"+str(today)+'统计')
2657
- """
2658
- titletxt="股票多重指标分析:按夏普比率降序排列"
2659
- footnote1="观察期:"+start+'至'+end+',表中数据为'+end+'快照'
2660
- footnote2="表中的夏普比率/索替诺比率/阿尔法值均为TTM滚动值"
2661
- import datetime; todaydt=datetime.date.today()
2662
- footnote3="数据来源:新浪财经/东方财富,"+str(todaydt)+'统计'
2663
- footnote=footnote1+'\n'+footnote2+'n'+footnote3
2664
-
2665
- #确定表格字体大小
2666
- titile_font_size=font_size
2667
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
2668
-
2669
- df_display_CSS(fsdf6,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
2670
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
2671
- data_font_size=data_font_size)
2672
-
2673
-
2674
- return df1
2675
- #==============================================================================
2676
- #==============================================================================
2677
- if __name__=='__main__':
2678
- tickers=['801160','801120','801170','801710','801890','801040','801130','801180','801720','801970']
2679
- start='2022-1-1'
2680
- end='2023-3-22'
2681
- info_type='Close'
2682
-
2683
- df=get_industry_sw('1')
2684
- df=industry_correlation_sw(df,tickers,start,end,info_type='Close')
2685
-
2686
- def cm2inch(x,y):
2687
- return x/2.54,y/2.54
2688
-
2689
- def industry_correlation_sw(df,tickers,start,end, \
2690
- info_type='Close',corr_size=6,star_size=5):
2691
- """
2692
- 功能:股票/指数收盘价之间的相关性
2693
- info_type='Close': 默认Close, 还可为Open/High/Low/Volume
2694
- """
2695
- # 检查行业个数
2696
- if not isinstance(tickers,list) or len(tickers) < 2:
2697
- print(" #Error(industry_correlation_sw): number of industries too few",tickers)
2698
- return None
2699
-
2700
- # 检查信息类型
2701
- info_types=['Close','Open','High','Low','Volume']
2702
- info_types_cn=['收盘价','开盘价','最高价','最低价','成交量']
2703
- if not(info_type in info_types):
2704
- print(" #Error(industry_correlation_sw): invalid information type",info_type)
2705
- print(" Supported information type:",info_types)
2706
- return None
2707
- pos=info_types.index(info_type)
2708
- info_type_cn=info_types_cn[pos]
2709
-
2710
- # 检查日期
2711
- result,startdt,enddt=check_period(start,end)
2712
- if not result:
2713
- print(" #Error(industry_correlation_sw): invalid period",start,end)
2714
- return None
2715
-
2716
- # 合成行业行情信息
2717
- print(" Consolidating industry performance, please wait ...")
2718
- import pandas as pd
2719
-
2720
- """
2721
- tickercodes=industry_sw_codes(tickers)
2722
- if tickercodes is None:
2723
- tickercodes=tickers
2724
- """
2725
-
2726
- dfs=None
2727
- for ind in tickers:
2728
- dft=df[df['ticker']==ind]
2729
- if dft is None:
2730
- print(" #Warning(industry_correlation_sw): unknown industry code",ind)
2731
- continue
2732
-
2733
- dft2=dft[(dft.index >= startdt) & (dft.index <= enddt)]
2734
- dft3=pd.DataFrame(dft2[info_type])
2735
- dft3.rename(columns={info_type:industry_sw_name(ind)},inplace=True)
2736
-
2737
- if dfs is None:
2738
- dfs=dft3
2739
- else:
2740
- dfs=pd.merge(dfs,dft3,how='inner',left_index=True,right_index=True)
2741
- dfs.dropna(axis=0,inplace=True)
2742
-
2743
- df_coor = dfs.corr()
2744
-
2745
- print(" Preparing cross-industry correlations, please wait ...")
2746
- # here put the import lib
2747
- import seaborn as sns
2748
- sns.set(font='SimHei') # 解决Seaborn中文显示问题
2749
-
2750
- #fig = plt.figure(figsize=(cm2inch(12,8)))
2751
- fig = plt.figure(figsize=(12.8,6.4))
2752
- ax1 = plt.gca()
2753
-
2754
- #构造mask,去除重复数据显示
2755
- import numpy as np
2756
- mask = np.zeros_like(df_coor)
2757
- mask[np.triu_indices_from(mask)] = True
2758
- mask2 = mask
2759
- mask = (np.flipud(mask)-1)*(-1)
2760
- mask = np.rot90(mask,k = -1)
2761
-
2762
- im1 = sns.heatmap(df_coor,annot=True,cmap="YlGnBu"
2763
- , mask=mask#构造mask,去除重复数据显示
2764
- ,vmax=1,vmin=-1
2765
- , fmt='.2f',ax = ax1,annot_kws={"size":corr_size})
2766
-
2767
- ax1.tick_params(axis = 'both', length=0)
2768
-
2769
- #计算相关性显著性并显示
2770
- from scipy.stats import pearsonr
2771
- rlist = []
2772
- plist = []
2773
- for i in dfs.columns.values:
2774
- for j in dfs.columns.values:
2775
- r,p = pearsonr(dfs[i],dfs[j])
2776
- try:
2777
- rlist.append(r)
2778
- plist.append(p)
2779
- except:
2780
- rlist._append(r)
2781
- plist._append(p)
2782
-
2783
- rarr = np.asarray(rlist).reshape(len(dfs.columns.values),len(dfs.columns.values))
2784
- parr = np.asarray(plist).reshape(len(dfs.columns.values),len(dfs.columns.values))
2785
- xlist = ax1.get_xticks()
2786
- ylist = ax1.get_yticks()
2787
-
2788
- widthx = 0
2789
- widthy = -0.15
2790
-
2791
- # 星号的大小
2792
- font_dict={'size':star_size}
2793
-
2794
- for m in ax1.get_xticks():
2795
- for n in ax1.get_yticks():
2796
- pv = (parr[int(m),int(n)])
2797
- rv = (rarr[int(m),int(n)])
2798
- if mask2[int(m),int(n)]<1.:
2799
- #if abs(rv) > 0.5:
2800
- if rv > 0.3:
2801
- if pv< 0.05 and pv>= 0.01:
2802
- ax1.text(n+widthx,m+widthy,'*',ha = 'center',color = 'white',fontdict=font_dict)
2803
- if pv< 0.01 and pv>= 0.001:
2804
- ax1.text(n+widthx,m+widthy,'**',ha = 'center',color = 'white',fontdict=font_dict)
2805
- if pv< 0.001:
2806
- #print([int(m),int(n)])
2807
- ax1.text(n+widthx,m+widthy,'***',ha = 'center',color = 'white',fontdict=font_dict)
2808
- else:
2809
- if pv< 0.05 and pv>= 0.01:
2810
- ax1.text(n+widthx,m+widthy,'*',ha = 'center',color = 'k',fontdict=font_dict)
2811
- elif pv< 0.01 and pv>= 0.001:
2812
- ax1.text(n+widthx,m+widthy,'**',ha = 'center',color = 'k',fontdict=font_dict)
2813
- elif pv< 0.001:
2814
- ax1.text(n+widthx,m+widthy,'***',ha = 'center',color = 'k',fontdict=font_dict)
2815
-
2816
- plt.title("行业板块/指数"+info_type_cn+"之间的相关性")
2817
- plt.tick_params(labelsize=corr_size)
2818
-
2819
- footnote1="\n显著性数值:***非常显著(<0.001),**很显著(<0.01),*显著(<0.05),其余为不显著"
2820
- footnote2="\n系数绝对值:>=0.8极强相关,0.6-0.8强相关,0.4-0.6相关,0.2-0.4弱相关,否则为极弱(不)相关"
2821
-
2822
- footnote3="\n观察期间: "+start+'至'+end
2823
- import datetime as dt; stoday=dt.date.today()
2824
- footnote4=";来源:Sina/EM,"+str(stoday)+";基于申万行业/指数分类"
2825
-
2826
- fontxlabel={'size':corr_size}
2827
- plt.xlabel(footnote1+footnote2+footnote3+footnote4,fontxlabel)
2828
-
2829
- plt.gca().set_facecolor('whitesmoke')
2830
- plt.show()
2831
-
2832
- return df_coor
2833
-
2834
- #==============================================================================
2835
- #==============================================================================
2836
- if __name__=='__main__':
2837
- industries=['煤炭','医药生物','801750']
2838
- top=10
2839
- printout=True
2840
-
2841
- def mixed_industry_stocks(industries=['煤炭','医药生物'],top=10,printout=True, \
2842
- facecolor='papayawhip',font_size='16px'):
2843
- """
2844
- 功能:将不同行业指数(industries)中的前top个(按指数内权重降序)成分股合成为字典,等权重
2845
- """
2846
-
2847
- # 将行业列表转换为行业代码列表
2848
- industries1=[]
2849
- for i in industries:
2850
- if i.isdigit():
2851
- industries1=industries1+[i]
2852
- else:
2853
- industries1=industries1+[industry_sw_code(i)]
2854
-
2855
- # 抓取行业内成分股。合并
2856
- import pandas as pd
2857
- df=pd.DataFrame()
2858
- for i in industries1:
2859
- _,dft=industry_stock_sw(industry=i,top=top,printout=False)
2860
- dft['行业代码']=i
2861
- dft['行业名称']=industry_sw_name(i)
2862
-
2863
- if len(df)==0:
2864
- df=dft
2865
- else:
2866
- df=pd.concat([df,dft])
2867
-
2868
- # 去掉重复的股票(假设可能有一只股票被计入多个指数)
2869
- df.drop_duplicates(subset=['证券代码'], keep='first', inplace=True)
2870
- df['初始权重']=round(1.0 / len(df),4)
2871
- df.reset_index(drop=True,inplace=True)
2872
- df['序号']=df.index+1
2873
-
2874
- df_print=df[['序号','证券名称','证券代码','初始权重','行业名称','行业代码']]
2875
-
2876
- if printout:
2877
- #alignlist=['center']+['center']*(len(list(df_print))-1)
2878
-
2879
- if len(industries) > 1:
2880
- #print("\n*** 混合行业投资组合的成分股:初始等权重\n")
2881
- titletxt="多行业投资组合的成分股:初始等权重"
2882
- else:
2883
- #print("\n*** 单一行业投资组合的成分股:初始等权重\n")
2884
- titletxt="单一行业投资组合的成分股:初始等权重"
2885
-
2886
- #print(df_print.to_markdown(index=False,tablefmt='plain',colalign=alignlist))
2887
- import datetime; todaydt=datetime.date.today()
2888
- #print("\n*** 数据来源:申万宏源,统计日期:"+str(today))
2889
- footnote="数据来源:申万宏源,统计日期:"+str(todaydt)
2890
-
2891
- #确定表格字体大小
2892
- titile_font_size=font_size
2893
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
2894
-
2895
- df_display_CSS(df_print,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
2896
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
2897
- data_font_size=data_font_size)
2898
-
2899
- # 生成成分股字典
2900
- stock_dict=df.set_index(['证券代码'])['初始权重'].to_dict()
2901
-
2902
- #return stock_dict
2903
- return list(stock_dict)
2904
- #==============================================================================
2905
- if __name__=='__main__':
2906
- industry='房地产开发'
2907
- industry='证券Ⅱ'
2908
- top=5
2909
- sw_level='2'
2910
-
2911
-
2912
- def find_peers_china(industry='',top=20,rank=20,sw_level='2'):
2913
- """
2914
- ===========================================================================
2915
- 功能:找出一个申万行业的上市公司排名
2916
- 主要参数:
2917
- industry:申万行业名称。当industry = '',显示的内容由sw_level控制。申万二级行业分类
2918
- sw_level:申万一级行业'1',二级行业'2',三级行业'3',其他'F'、'S'、'B'、'C'
2919
- top:排名数量,为正数时表示前多少名,可为负数(代表倒数多少名)
2920
-
2921
- 示例:
2922
- stocks2=find_peers_china('通信工程及服务',top=10)
2923
- """
2924
-
2925
- # 避免混淆
2926
- if top < rank:
2927
- rank=top
2928
-
2929
- # 默认情形处理
2930
- if industry == '':
2931
- itype_list=['1','2','3','F','S','B','C']
2932
- sw_level_list=['1','2','3','F','S','B','C']
2933
- pos=sw_level_list.index(sw_level)
2934
- itype=itype_list[pos]
2935
-
2936
- print_industry_sw(itype=itype,numberPerLine=4,colalign='left')
2937
- return None
2938
-
2939
- if industry != '':
2940
- if not isinstance(industry,str):
2941
- print(" #Error(find_peers_china): expecting an industry code or name for",industry)
2942
- return None
2943
-
2944
- # 申万行业代码
2945
- industry=industry.split('.')[0]
2946
- if industry.isdigit():
2947
- #industry=industry.split('.')[0]
2948
- iname=industry_sw_name(industry)
2949
- if iname is None:
2950
- print(" #Warning(find_peers_china): Shenwan industry code not found for",industry)
2951
- return None
2952
-
2953
- swlist,_=industry_stock_sw(industry=industry,top=rank,printout=True)
2954
- else:
2955
- icode=industry_sw_code(industry)
2956
- if icode is None:
2957
- industry_df=industry_sw_list()
2958
- industry_name_list=list(industry_df['name'])
2959
- industry_code_list=list(industry_df['code'])
2960
- possible_industry_list=[]
2961
- for ind in industry_name_list:
2962
- if industry in ind:
2963
- pos=industry_name_list.index(ind)
2964
- ind_code=industry_code_list[pos]+'.SW'
2965
- possible_industry_list=possible_industry_list+[ind+'('+ind_code+')']
2966
-
2967
- print(" #Warning(find_peers_china): Shenwan industry name not found for",industry)
2968
- if len(possible_industry_list) >0:
2969
- print(" Do you mean the following Shenwan industry names?")
2970
- #print_list(possible_industry_list,leading_blanks=2)
2971
- printlist(possible_industry_list,numperline=5,beforehand=' ',separator=' ')
2972
- else:
2973
- print(" Sorry, no similiar Shenwan industry name containing ",industry)
2974
-
2975
- return None
2976
- else:
2977
- swlist,_=industry_stock_sw(industry=icode,top=rank,printout=True)
2978
-
2979
- if not (swlist is None):
2980
- tickerlist=swlist[:top]
2981
- return tickerlist
2982
- else:
2983
- print(" #Warning(find_peers_china): failed in retrieving component stocks for Shenwan industry",industry)
2984
- print(" Possible solution: upgrade akshare. if still fail, report to the author of siat for help")
2985
- return []
2986
-
2987
- #==============================================================================
2988
- # 申万行业指数历史行情
2989
- #==============================================================================
2990
- if __name__ =="__main__":
2991
- ticker='859821'
2992
- ticker='859821.SW'
2993
-
2994
- start='2023-1-1'
2995
- end='2023-2-1'
2996
-
2997
- df=get_sw_index(ticker,start,end)
2998
-
2999
- def get_sw_index(ticker,start,end):
3000
- """
3001
- 功能:抓取单个申万行业指数历史行情
3002
- ticker:申万行业指数以8x开始,容易与北交所股票代码混淆。建议带有后缀.SW
3003
- """
3004
-
3005
- # 判断是否申万行业指数代码
3006
- ticker=ticker.upper()
3007
- ticker_split=ticker.split('.')
3008
- """
3009
- if not (len(ticker_split)==2 and ticker_split[1]=='SW'):
3010
- return None
3011
- else:
3012
- symbol=ticker_split[0]
3013
- """
3014
- symbol=ticker_split[0]
3015
- if len(ticker_split) == 2:
3016
- if ticker_split[1] != 'SW':
3017
- return None
3018
- else:
3019
- return None
3020
-
3021
-
3022
- # 判断日期
3023
- result,startts,endts=check_period(start,end)
3024
- if not result:
3025
- print(" #Error(get_sw_index): invalid date(s) in or period between",start,'and',end)
3026
- return None
3027
-
3028
- import akshare as ak
3029
- import pandas as pd
3030
-
3031
- try:
3032
- dft = ak.index_hist_sw(symbol=symbol,period="day")
3033
- except:
3034
- try:
3035
- dft = ak.index_hist_fund_sw(symbol=symbol,period="day")
3036
- dft['代码']=symbol
3037
- dft['收盘']=dft['收盘指数']
3038
- dft['开盘']=dft['收盘指数']
3039
- dft['最高']=dft['收盘指数']
3040
- dft['最低']=dft['收盘指数']
3041
- dft['成交量']=0; dft['成交额']=0
3042
- except:
3043
- print(" #Error(get_sw_index): failed to retrieve index",symbol)
3044
- return None
3045
-
3046
- dft['ticker']=dft['代码'].apply(lambda x: x+'.SW')
3047
-
3048
- dft['name']=dft['代码'].apply(lambda x:industry_sw_name(x))
3049
-
3050
- dft['date']=dft['日期'].apply(lambda x: pd.to_datetime(x))
3051
- dft.set_index('date',inplace=True)
3052
- dft['Close']=dft['收盘']
3053
- dft['Adj Close']=dft['Close']
3054
- dft['Open']=dft['开盘']
3055
- dft['High']=dft['最高']
3056
- dft['Low']=dft['最低']
3057
-
3058
- yi=100000000 #亿
3059
- dft['Volume']=dft['成交量']*yi #原始数据为亿股
3060
- dft['Amount']=dft['成交额']*yi #原始数据为亿元
3061
-
3062
- colList=['ticker','Close','Adj Close','Open','High','Low','Volume','Amount','name']
3063
- dft2=dft[colList]
3064
- dft3=dft2[(dft2.index >= startts)]
3065
- dft4=dft3[(dft3.index <= endts)]
3066
- dft4.sort_index(inplace=True)
3067
-
3068
- df=dft4
3069
-
3070
- return df
3071
-
3072
- #==============================================================================
3073
- if __name__ =="__main__":
3074
- tickers=['859821.SW','859822.Sw','600519.SS']
3075
-
3076
- start='2023-1-1'
3077
- end='2023-2-1'
3078
-
3079
- df=get_sw_indexes(tickers,start,end)
3080
-
3081
- def get_sw_indexes(tickers,start,end):
3082
- """
3083
- 功能:抓取多个申万行业指数历史行情
3084
- tickers:申万行业指数列表,要求带有后缀.SW
3085
- """
3086
-
3087
- # 判断日期
3088
- result,startts,endts=check_period(start,end)
3089
- if not result:
3090
- print(" #Error(get_sw_indexes): invalid date(s) in or period between",start,'and',end)
3091
- return None
3092
-
3093
- #检查是否为多个指数:空的列表
3094
- if isinstance(tickers,list) and len(tickers) == 0:
3095
- pass
3096
- return None
3097
-
3098
- #检查是否为多个指数:单个指数代码
3099
- if isinstance(tickers,str):
3100
- tickers=[tickers]
3101
-
3102
- # 过滤申万行业指数代码
3103
- tickers_sw=[]
3104
- for t in tickers:
3105
- t=t.upper()
3106
- t_split=t.split('.')
3107
- if not (len(t_split)==2 and t_split[1]=='SW'):
3108
- continue
3109
- else:
3110
- tickers_sw=tickers_sw+[t]
3111
-
3112
-
3113
- #检查是否为多个指数:列表中只有一个代码
3114
- if isinstance(tickers_sw,list) and len(tickers_sw) == 1:
3115
- ticker1=tickers_sw[0]
3116
- df=get_sw_index(ticker1,startts,endts)
3117
- return df
3118
-
3119
- import pandas as pd
3120
- #处理列表中的第一个指数
3121
- i=0
3122
- df=None
3123
- while df is None:
3124
- t=tickers_sw[i]
3125
- df=get_sw_index(t,startts,endts)
3126
- if not (df is None):
3127
- columns=create_tuple_for_columns(df,t)
3128
- df.columns=pd.MultiIndex.from_tuples(columns)
3129
- else:
3130
- i=i+1
3131
- if (i+1) == len(tickers_sw):
3132
- #已经到达指数代码列表末尾
3133
- return df
3134
-
3135
- #处理列表中的其余指数
3136
- for t in tickers_sw[(i+1):]:
3137
- dft=get_sw_index(t,startts,endts)
3138
- if not (dft is None):
3139
- columns=create_tuple_for_columns(dft,t)
3140
- dft.columns=pd.MultiIndex.from_tuples(columns)
3141
-
3142
- df=pd.merge(df,dft,how='inner',left_index=True,right_index=True)
3143
-
3144
- return df
3145
-
3146
- #==============================================================================
3147
- if __name__ =="__main__":
3148
- sw_level='F'
3149
- sw_level='2'
3150
- indicator='Exp Ret%'
3151
- start='MRY'
3152
- end='default'
3153
- printout='smart'
3154
-
3155
-
3156
- def industry_scan_china(sw_level='F', \
3157
- indicator='Exp Adj Ret%', \
3158
- base_return='Exp Adj Ret%', \
3159
- start='MRY',end='default', \
3160
- RF=0, \
3161
- printout='smart', \
3162
- facecolor='papayawhip',font_size='16px'):
3163
- """
3164
- ===========================================================================
3165
- 功能:扫描申万行业指数,按照投资收益率排名。对网速要求高,可能需要较长时间。
3166
- 主要参数:
3167
- sw_level:申万行业分类,默认'F'。
3168
- F--市场表征(默认),S--投资风格(策略),B--大类风格,C--金创,
3169
- 1--一级行业,2--二级行业,3--三级行业
3170
- indicator:行业排名使用的指标,默认'Exp Adj Ret%',可使用RAR指标等
3171
- start与end:评估期间。允许MRM/MRQ/MRY(默认)/YTD/L3Y(近三年)/L5Y(近五年)
3172
- base_return:计算sharpe和sortino比率时使用的收益率类型,默认'Exp Adj Ret%'。
3173
- 当indicator不是sharpe或sortino比率时,base_return需要与indicator保持一致。
3174
- RF:年化无风险收益率,默认0,可参照一年期国债收益率(Government Bond Yield)
3175
- printout:筛选方式。
3176
- smart--收益前10名与后10名(默认),winner--仅限收益为正的行业,
3177
- loser--仅限收益为负的行业,50--收益前50名,-10--收益后10名,all--所有行业
3178
- facecolor:背景颜色,默认'papayawhip'
3179
- font_size:输出表格的字体大小,默认'16px'
3180
-
3181
- 示例:
3182
- info=industry_scan_china(sw_level='3',indicator='sharpe',start='MRY')
3183
- """
3184
- #indicator='Exp Ret%'
3185
-
3186
- #print(" Evaluating industry performance, it may take up to hours ... ...")
3187
-
3188
- #节省获取数据的量和时间
3189
- if start=='MRY' and end=='default': #默认参数
3190
- if 'Weekly' in indicator or 'Weekly' in base_return:
3191
- start='MRM'
3192
- if 'Monthly' in indicator or 'Monthly' in base_return:
3193
- start='MRQ'
3194
-
3195
- # 检查申万行业
3196
- sw_level_list=['1','2','3','F','S','B','C','J1','J2','J3','JF']
3197
- if sw_level not in sw_level_list:
3198
- print(" #Warning(industry_scan_china): invalid Shenwan industry types for",sw_level)
3199
- print(" Valid Shenwan industry types:",end='')
3200
- print_list(sw_level_list)
3201
- return None
3202
-
3203
- # 检查支持的指标
3204
- base_return_list=['Exp Ret%','Exp Ret Volatility%','Exp Ret LPSD%', \
3205
- 'Exp Adj Ret%','Exp Adj Ret Volatility%','Exp Adj Ret LPSD%', \
3206
-
3207
- 'Annual Ret%','Annual Ret Volatility%','Annual Ret LPSD%', \
3208
- 'Annual Adj Ret%','Annual Adj Ret Volatility%','Annual Adj Ret LPSD%', \
3209
-
3210
- 'Quarterly Ret%','Quarterly Ret Volatility%','Quarterly Ret LPSD%', \
3211
- 'Quarterly Adj Ret%','Quarterly Adj Ret Volatility%','Quarterly Adj Ret LPSD%', \
3212
-
3213
- 'Monthly Ret%','Monthly Ret Volatility%','Monthly Ret LPSD%', \
3214
- 'Monthly Adj Ret%','Monthly Adj Ret Volatility%','Monthly Adj Ret LPSD%', \
3215
-
3216
- 'Weekly Ret%','Weekly Ret Volatility%','Weekly Ret LPSD%', \
3217
- 'Weekly Adj Ret%','Weekly Adj Ret Volatility%','Weekly Adj Ret LPSD%', \
3218
- ]
3219
- if base_return not in base_return_list:
3220
- print(" #Warning(industry_scan_china): unsupported base return type for",base_return)
3221
- print(" Supported base return:")
3222
- printlist(base_return_list,numperline=5,beforehand=' ',separator=', ')
3223
- return None
3224
-
3225
-
3226
- indicator_list=base_return_list + ['sharpe','sortino']
3227
-
3228
- if indicator.lower() in ['sharpe','sortino']:
3229
- indicator=indicator.lower()
3230
-
3231
- if indicator not in indicator_list:
3232
- print(" #Warning(industry_scan_china): unsupported indicator for",indicator)
3233
- print(" Supported indicators:")
3234
- printlist(indicator_list,numperline=5,beforehand=' ',separator=', ')
3235
- return None
3236
-
3237
- # 检查日期:
3238
- fromdate,todate=start_end_preprocess(start,end)
3239
- import datetime as dt; todaydt=dt.date.today().strftime('%Y-%m-%d')
3240
- """
3241
- #截至日期
3242
- import datetime as dt; todaydt=dt.date.today().strftime('%Y-%m-%d')
3243
- end=end.lower()
3244
- if end in ['default','today']:
3245
- todate=todaydt
3246
- else:
3247
- validdate,todate=check_date2(end)
3248
- if not validdate:
3249
- print(" #Warning(industry_scan_china): invalid date for",end)
3250
- todate=todaydt
3251
- # 检查日期:开始日期
3252
- start=start.lower()
3253
- if start in ['default','mrm']: # 默认近一个月
3254
- fromdate=date_adjust(todate,adjust=-31)
3255
- elif start in ['mrq']: # 近三个月
3256
- fromdate=date_adjust(todate,adjust=-63)
3257
- elif start in ['mry']: # 近一年
3258
- fromdate=date_adjust(todate,adjust=-366)
3259
- elif start in ['ytd']: # 今年以来
3260
- fromdate=str(today.year)+'-1-1'
3261
- elif start in ['lty']: # 近三年以来
3262
- fromdate=date_adjust(todate,adjust=-366*3)
3263
- elif start in ['lfy']: # 近五年以来
3264
- fromdate=date_adjust(todate,adjust=-366*5)
3265
- else:
3266
- validdate,fromdate=check_date2(start)
3267
- if not validdate:
3268
- print(" #Warning(industry_scan_china): invalid date for",start,"/b, set to MRM")
3269
- fromdate=date_adjust(todate,adjust=-31)
3270
- """
3271
- # 获取申万行业类别内部标识
3272
- #itype_list=['1','2','3','F','S','B','C']
3273
- itype_list=sw_level_list
3274
- pos=sw_level_list.index(sw_level)
3275
- itype=itype_list[pos]
3276
-
3277
- #df1=industry_sw_list()
3278
- #df2=dft[dft['type']==itype]
3279
-
3280
-
3281
- # 循环获取行业指数,简单计算指数增长率,排序
3282
- #print(" Retrieving industry info, which may need up to hours, take a break ...")
3283
- #print("\n *** Step 1: Retrieving industry information")
3284
- print(" *** Step 1: ")
3285
- # 获取行业历史数据,本步骤所需时间较长
3286
- df=get_industry_sw(itype=itype)
3287
- found=df_have_data(df)
3288
- if not found=='Found':
3289
- print(" #Warning(compare_mindustry_sw): data tentatively unavailable for group",itype)
3290
- print(" Data is sometimes unavialble at certain time points, try again later")
3291
- return None
3292
-
3293
- # 计算指标
3294
- #print("\n *** Step 2: Computing performance indicators")
3295
- print("\n *** Step 2: ")
3296
- # 计算基础数据,本步骤所需时间较长
3297
- idf,idfall=calc_industry_sw(df,fromdate,todate)
3298
-
3299
- #设置base_return:非['sharpe','sortino']时
3300
- if not indicator in ['sharpe','sortino']:
3301
- #以下的判断顺序不可轻易改变
3302
- if 'Ret Volatility%' in indicator:
3303
- base_return=indicator.replace('Ret Volatility%','Ret%')
3304
- elif 'Ret Volatility' in indicator:
3305
- base_return=indicator.replace('Ret Volatility','Ret')
3306
- elif 'Ret LPSD%' in indicator:
3307
- base_return=indicator.replace('Ret LPSD%','Ret%')
3308
- elif 'Ret LPSD' in indicator:
3309
- base_return=indicator.replace('Ret LPSD','Ret')
3310
- else:
3311
- base_return=indicator
3312
-
3313
-
3314
- #计算期间内的无风险收益率:RF为小数,而idf中的收益率为百分数
3315
- if '%' in base_return:
3316
- RFS=RF*100 #百分数
3317
-
3318
- base_return_volatility=base_return.replace('Ret%','Ret Volatility%')
3319
- base_return_lpsd=base_return.replace('Ret%','Ret LPSD%')
3320
- else:
3321
- RFS=RF
3322
-
3323
- base_return_volatility=base_return.replace('Ret','Ret Volatility')
3324
- base_return_lpsd=base_return.replace('Ret','Ret LPSD')
3325
-
3326
- if 'Exp' in base_return:
3327
- RF_daily=RFS/365
3328
- RF_days=RF_daily * calculate_days(fromdate, todate)
3329
-
3330
- elif 'Annual' in base_return:
3331
- RF_days=RFS
3332
-
3333
- elif 'Quarterly' in base_return:
3334
- RF_days=RFS/4
3335
-
3336
- elif 'Monthly' in base_return:
3337
- RF_days=RFS/12
3338
-
3339
- elif 'Weekly' in base_return:
3340
- RF_days=RFS/52
3341
-
3342
- idf['sharpe']=(idf[base_return]-RF_days) / idf[base_return_volatility]
3343
- idf['sortino']=(idf[base_return]-RF_days) / idf[base_return_lpsd]
3344
-
3345
-
3346
- # 排序
3347
- idf.sort_values(indicator,ascending=False,inplace=True)
3348
- idf.reset_index(inplace=True)
3349
- idf.index=idf.index+1
3350
-
3351
- idf['Industry Name']=idf['ticker'].apply(lambda x: industry_sw_name(x))
3352
- idf['Industry Code']=idf['ticker'].apply(lambda x: x+'.SW')
3353
-
3354
- indicator_list1=indicator_list
3355
- indicator_list1.remove(indicator)
3356
- collist=['Industry Code','Industry Name',indicator]+indicator_list1
3357
- df2=idf[collist]
3358
-
3359
- # 修改比率的小数位数
3360
- for i in indicator_list:
3361
- df2[i]=df2[i].apply(lambda x: round(x,2))
3362
-
3363
- # 筛选
3364
- import pandas as pd
3365
- #'smart':默认
3366
- num=len(df2)
3367
- if num > 20:
3368
- df_high=df2.head(10)
3369
- df_low=df2.tail(10)
3370
- df_prt=pd.concat([df_high,df_low])
3371
- else:
3372
- df_prt=df2
3373
-
3374
- if printout=='all':
3375
- df_prt=df2
3376
- elif printout=='winner':
3377
- df_prt=df2[df2[indicator] > 0]
3378
- elif printout=='loser':
3379
- df_prt=df2[df2[indicator] <= 0]
3380
- else:
3381
- try:
3382
- printoutd=int(printout)
3383
- if printoutd>0:
3384
- df_prt=df2.head(printoutd)
3385
- else:
3386
- df_prt=df2.tail(-printoutd)
3387
- except: # 假定为smart
3388
- pass
3389
-
3390
- # 标题改中文
3391
- df_prt.rename(columns={'Industry Code':'代码','Industry Name':'名称', \
3392
- base_return:ectranslate(base_return), \
3393
- base_return_volatility:ectranslate(base_return_volatility), \
3394
- base_return_lpsd:ectranslate(base_return_lpsd), \
3395
- 'sharpe':'夏普比率','sortino':'索替诺比率'}, \
3396
- inplace=True)
3397
-
3398
- # 显示
3399
- if sw_level=='F':
3400
- sw_level_txt='申万市场表征指数'
3401
- elif sw_level=='S':
3402
- sw_level_txt='申万投资风格指数'
3403
- elif sw_level=='B':
3404
- sw_level_txt='申万大类风格指数'
3405
- elif sw_level=='C':
3406
- sw_level_txt='申万金创指数'
3407
- elif sw_level=='1':
3408
- sw_level_txt='申万一级行业'
3409
- elif sw_level=='2':
3410
- sw_level_txt='申万二级行业'
3411
- elif sw_level=='3':
3412
- sw_level_txt='申万三级行业'
3413
- elif sw_level=='J1':
3414
- sw_level_txt='申万基金基础一级指数'
3415
- elif sw_level=='J2':
3416
- sw_level_txt='申万基金基础二级指数'
3417
- elif sw_level=='J3':
3418
- sw_level_txt='申万基金基础三级指数'
3419
- elif sw_level=='JF':
3420
- sw_level_txt='申万基金特色指数'
3421
- else:
3422
- sw_level_txt='未知类别'
3423
-
3424
- if printout=='all':
3425
- printout_txt='所有指数'
3426
- elif printout=='smart':
3427
- printout_txt='前/后十个行业'
3428
- if len(df2) <=20:
3429
- printout_txt='所有指数'
3430
- elif printout=='winner':
3431
- printout_txt='收益为正者'
3432
- elif printout=='loser':
3433
- printout_txt='收益为负者'
3434
- else:
3435
- try:
3436
- num=int(printout)
3437
- if len(df2) > abs(num):
3438
- if num > 0:
3439
- printout_txt='收益排名前'+printout+"名"
3440
- else:
3441
- printout_txt='收益排名后'+str(abs(num))+"名"
3442
- else:
3443
- printout_txt='所有指数'
3444
- except:
3445
- printout_txt='未知筛选方式'
3446
-
3447
- #titletxt="申万行业业绩排行榜:"+sw_level_txt+',共'+str(len(df_prt))+"个指数符合条件"
3448
- #titletxt="行业业绩排行榜:"+sw_level_txt+','+ectranslate(indicator)+',筛选方式:'+printout_txt
3449
- titletxt="申万宏源行业/指数业绩龙虎榜:"+sw_level_txt+','+printout_txt
3450
- #print("\n***",titletxt,'\n')
3451
- """
3452
- alignlist=['center']+['left']*(len(list(df_prt))-1)
3453
- print(df_prt.to_markdown(index=True,tablefmt='plain',colalign=alignlist))
3454
- """
3455
- #print("\n *** 数据来源:综合申万宏源/东方财富/新浪财经,",todaydt,"\b;分析期间:",fromdate+'至'+todate)
3456
- #footnote1="筛选方式:all-所有,smart-收益最高最低各10个,winner-收益为正,loser-收益为负"
3457
- footnote1="注:夏普/索梯诺比率基于"+ectranslate(base_return)+",年化无风险利率"+str(round(RF*100,4))+'%'
3458
- footnote2="评估期间:"+str(fromdate)+'至'+str(todate)+",数据来源:申万宏源,"+str(todaydt)+"制表"
3459
- footnote=footnote1+'\n'+footnote2
3460
- #footnote=footnote2
3461
-
3462
- #确定表格字体大小
3463
- titile_font_size=font_size
3464
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
3465
-
3466
- df_prt['序号']=df_prt.index
3467
- if indicator=='sharpe':
3468
- df_prt=df_prt[['序号','名称','代码','夏普比率','索替诺比率', \
3469
- ectranslate(base_return),ectranslate(base_return_volatility),ectranslate(base_return_lpsd)]]
3470
- elif indicator=='sortino':
3471
- df_prt=df_prt[['序号','名称','代码','索替诺比率','夏普比率', \
3472
- ectranslate(base_return),ectranslate(base_return_volatility),ectranslate(base_return_lpsd)]]
3473
-
3474
- elif 'Volatility' in indicator:
3475
- df_prt=df_prt[['序号','名称','代码',ectranslate(base_return_volatility),ectranslate(base_return_lpsd), \
3476
- ectranslate(base_return),'夏普比率','索替诺比率']]
3477
- elif 'LPSD' in indicator:
3478
- df_prt=df_prt[['序号','名称','代码',ectranslate(base_return_lpsd),ectranslate(base_return_volatility), \
3479
- ectranslate(base_return),'夏普比率','索替诺比率']]
3480
- else:
3481
- df_prt=df_prt[['序号','名称','代码',ectranslate(base_return), \
3482
- ectranslate(base_return_volatility),ectranslate(base_return_lpsd),'夏普比率','索替诺比率']]
3483
-
3484
- #显示表格
3485
- df_display_CSS(df_prt,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
3486
- first_col_align='center',second_col_align='left', \
3487
- last_col_align='center',other_col_align='center', \
3488
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
3489
- data_font_size=data_font_size)
3490
-
3491
- return df2
3492
-
3493
-
3494
- #==============================================================================
3495
- if __name__=='__main__':
3496
- ticker='600791.SS'
3497
- ticker='东阿阿胶'
3498
-
3499
- contains_chinese(ticker)
3500
-
3501
- def contains_chinese(text):
3502
- """
3503
- 功能:判断字符串是否含有汉字
3504
- """
3505
- import re
3506
- return re.search(r'[\u4e00-\u9fff]', text) is not None
3507
-
3508
- #==============================================================================
3509
- if __name__=='__main__':
3510
- ticker='600791.SS'
3511
- ticker='689009.SS'
3512
-
3513
- ticker=['600791.SS','东阿阿胶']
3514
- level='1'
3515
-
3516
- find_industry_sw(ticker,level='1')
3517
-
3518
- def find_industry_sw(ticker,level='1',ticker_order=True,max_sleep=30):
3519
- """
3520
- ===========================================================================
3521
- 功能:寻找一只或一组股票所属的申万行业,支持股票代码和股票名称。
3522
- level='1':默认只查找申万1级行业;查找2/3级行业时间较久,可能触发反爬虫机制。
3523
- ticker_order=True:默认输出结果按照ticker中的顺序,而非按照所属行业排序。
3524
- max_sleep:为防止触发反爬虫机制,默认每次爬虫后睡眠最多30秒钟。
3525
-
3526
- 返回值:查找结果df。
3527
- """
3528
- print(" Searching shenwan industries for securities ... ...")
3529
-
3530
- if isinstance(ticker,str):
3531
- ticker=[ticker]
3532
-
3533
- tickerlist=[]
3534
- for t in ticker:
3535
- if not contains_chinese(t):
3536
- tt=t[:6]
3537
- tickerlist=tickerlist+[tt]
3538
- else:
3539
- tickerlist=tickerlist+[t]
3540
-
3541
- import akshare as ak
3542
- if level == '3':
3543
- df = ak.sw_index_third_info()
3544
- elif level == '2':
3545
- df = ak.sw_index_second_info()
3546
- else:
3547
- df = ak.sw_index_first_info()
3548
-
3549
- df['industry_code']=df['行业代码'].apply(lambda x: x[:6])
3550
- industry_list=list(df['industry_code'])
3551
-
3552
- import pandas as pd; import random; import time
3553
- result=pd.DataFrame(columns=['序号','证券名称','证券代码','行业名称','行业代码'])
3554
-
3555
- for i in industry_list:
3556
- print_progress_percent2(i,industry_list,steps=10,leading_blanks=2)
3557
-
3558
- iname=df[df['industry_code']==i]['行业名称'].values[0]
3559
-
3560
- try:
3561
- cdf = ak.index_component_sw(i)
3562
- except:
3563
- print(" #Warning(find_industry_sw): server banned this ip becos of too many requests")
3564
- print(" Solution: change to another ip or another computer, or try a few hours later.")
3565
- return
3566
- """
3567
- component_list=list(cdf['证券代码'])
3568
-
3569
- if ticker6 in component_list:
3570
- stock_name=cdf[cdf["证券代码"]==ticker6]['证券名称'].values[0]
3571
- print("\n\n股票代码:"+ticker+","+stock_name)
3572
-
3573
- isi=i+'.SI'
3574
- industry_name=df[df['行业代码']==isi]['行业名称'].values[0]
3575
- print("申万"+str(level)+"级行业代码:"+i+".SW,"+industry_name)
3576
-
3577
- break
3578
- """
3579
- for t in tickerlist:
3580
- torder=tickerlist.index(t)+1
3581
-
3582
- if not contains_chinese(t):
3583
- dft=cdf[cdf['证券代码']==t]
3584
- if len(dft)==0: continue
3585
- else:
3586
- tname=cdf[cdf['证券代码']==t]['证券名称'].values[0]
3587
- s=pd.Series({'序号':torder,'证券名称':tname,'证券代码':t,'行业名称':iname,'行业代码':i})
3588
- try:
3589
- result=result.append(s,ignore_index=True)
3590
- except:
3591
- result=result._append(s,ignore_index=True)
3592
- else:
3593
- dft=cdf[cdf['证券名称']==t]
3594
- if len(dft)==0: continue
3595
- else:
3596
- tcode=cdf[cdf['证券名称']==t]['证券代码'].values[0]
3597
- s=pd.Series({'序号':torder,'证券名称':t,'证券代码':tcode,'行业名称':iname,'行业代码':i})
3598
- try:
3599
- result=result.append(s,ignore_index=True)
3600
- except:
3601
- result=result._append(s,ignore_index=True)
3602
-
3603
- #是否都找到了?
3604
- if len(result) == len(tickerlist): break
3605
-
3606
- #生成随机数睡眠,试图防止被反爬虫,不知是否管用!
3607
- random_int=random.randint(1,max_sleep)
3608
- time.sleep(random_int)
3609
-
3610
- #排序
3611
- if not ticker_order:
3612
- #按行业代码排序
3613
- result.sort_values(by='行业代码',inplace=True)
3614
- else:
3615
- #按ticker顺序排序
3616
- result.sort_values(by='序号',inplace=True)
3617
- """
3618
- if contains_chinese(tickerlist[0]):
3619
- result.sort_values(by='证券名称',key=lambda x: x.map(dict(zip(tickerlist,range(len(tickerlist))))))
3620
- else:
3621
- result.sort_values(by='证券代码',key=lambda x: x.map(dict(zip(tickerlist,range(len(tickerlist))))))
3622
- """
3623
- #result.reset_index(drop=True,inplace=True)
3624
-
3625
- #显示结果
3626
- titletxt="证券所属行业:申万"+str(level)+"级行业"
3627
- import datetime; todaydt = datetime.date.today()
3628
- footnote="数据来源:申万宏源,"+str(todaydt)+"统计"
3629
- """
3630
- collist=list(result)
3631
- result['序号']=result.index+1
3632
- result=result[['序号']+collist]
3633
- """
3634
- print('')
3635
- df_display_CSS(result,titletxt=titletxt,footnote=footnote,facecolor='papayawhip',decimals=2, \
3636
- first_col_align='center',second_col_align='left', \
3637
- last_col_align='left',other_col_align='left', \
3638
- titile_font_size='16px',heading_font_size='15px', \
3639
- data_font_size='15px')
3640
-
3641
- return result
3642
-
3643
- #==============================================================================
3644
- #东方财富板块分类:查找股票所属行业与主要同行
3645
- #==============================================================================
3646
- if __name__=='__main__':
3647
- ticker='600519.SS'
3648
- indicator="市盈率"
3649
- rank=40
3650
-
3651
- font_size="16px"; facecolor="papayawhip"
3652
-
3653
- peers=stock_industry_peer_em(ticker,indicator="市盈率",rank=10)
3654
-
3655
- def stock_peers_em(ticker='',indicator='市盈率',rank=10, \
3656
- force_show_stock=True, \
3657
- font_size="16px",facecolor="papayawhip", \
3658
- numberPerLine=5):
3659
- """
3660
- ===========================================================================
3661
- 功能:基于东方财富行业分类,查找股票所属的行业板块以及主要同行排名。
3662
- 特点:行业分类较粗糙,略胜于无。
3663
- 主要参数:
3664
- ticker:股票代码,默认''显示所有板块名称。
3665
- indicator:排名指标,默认''。例如:"股价"、"流动性"、"市净率"、"市盈率"。
3666
- rank:排名数量,默认10前十名。
3667
- force_show_stock:是否显示股票信息,默认False。
3668
- font_size:表格字体大小,默认"16px"。
3669
- facecolor:输出表格的背景颜色,默认"papayawhip"。
3670
- numberPerLine:输出表格中的每行显示个数,默认5。
3671
-
3672
- 注意:若结果异常,可尝试升级插件akshare。
3673
-
3674
- 示例:
3675
- industries=stock_peers_em() # 显示东方财富所有行业板块分类
3676
- # 显示002373.SZ在所属东方财富板块中的股价排名
3677
- peers=stock_peers_em(ticker="002373.SZ",
3678
- indicator="股价",
3679
- force_show_stock=True)
3680
- peers=stock_peers_em(ticker="002373.SZ",
3681
- indicator="流动性",
3682
- force_show_stock=True)
3683
- """
3684
- if indicator in ["股价","股票价格","价格","收盘价","价位"]:
3685
- indicator="最新价"
3686
- if indicator in ["流动性","热门","活跃"]:
3687
- indicator="换手率"
3688
-
3689
- if rank==0:
3690
- rank=5
3691
-
3692
- import akshare as ak
3693
-
3694
- #股票基本信息
3695
- try:
3696
- info=ak.stock_individual_info_em(symbol=ticker[:6])
3697
- except:
3698
- if not ticker=='':
3699
- print(" #Warning(stock_peer_em): stock info not found for",ticker)
3700
- print(" Solution: if stock code is correct, upgrade akshare and try again")
3701
-
3702
- df_em=ak.stock_board_industry_name_em()
3703
- #df_em.sort_values(by="板块名称",ascending=True,inplace=True)
3704
- #industry_names_em=sorted(list(df_em["板块名称"]),reverse=True)
3705
- industry_names_em=list(df_em["板块名称"])
3706
- print(" List of stock industries in East Money:",len(df_em),end='')
3707
- #printlist(industry_names_em,numperline=7,beforehand=' ')
3708
- printInLine_md(industry_names_em,numberPerLine=numberPerLine,colalign="center")
3709
- return None
3710
-
3711
- #东方财富板块名称
3712
- hangye=info[info['item']=='行业']['value'].values[0]
3713
-
3714
- #东方财富板块成分股
3715
- cfg=ak.stock_board_industry_cons_em(symbol=hangye)
3716
-
3717
- cfg_col_list=list(cfg)
3718
- indicator_col=''
3719
- for i in cfg_col_list:
3720
- if indicator in i:
3721
- indicator_col=i
3722
- break
3723
-
3724
- if indicator_col=='' or indicator=='':
3725
- if indicator!='':
3726
- print(" #Warning(stock_peer_em): unsupported indicator",indicator)
3727
-
3728
- remove_list=["序号","代码","名称"]
3729
- cfg_col_list_tmp = [x for x in cfg_col_list if x not in remove_list]
3730
- print(" Supported indicators:",)
3731
- print_list(cfg_col_list_tmp,leading_blanks=2)
3732
- return None
3733
-
3734
- collist=['序号','名称','代码','市盈率-动态','市净率','最新价','涨跌幅','换手率']
3735
- if not indicator_col in collist:
3736
- collist=collist+[indicator_col]
3737
-
3738
- #重新排序
3739
- #cfg.sort_values(by=indicator_col,ascending=False,inplace=True)
3740
- cfg.sort_values(by=[indicator_col]+["代码"],ascending=[False,True],inplace=True)
3741
- cfg.reset_index(drop=True,inplace=True)
3742
- cfg['序号']=cfg.index+1
3743
-
3744
- #行业均值与中位数
3745
- indicator_mean=cfg[indicator_col].mean()
3746
- indicator_median=cfg[indicator_col].median()
3747
- indicator_total=cfg["代码"].count()
3748
-
3749
- indicator_value=cfg[cfg['代码']==ticker[:6]][indicator_col].values[0]
3750
- indicator_seq=cfg[cfg['代码']==ticker[:6]]["序号"].values[0]
3751
- from scipy.stats import percentileofscore
3752
- indicator_pct=percentileofscore(cfg[indicator_col],indicator_value)
3753
-
3754
- #筛选
3755
- if rank > 0:
3756
- rank_flag="前"
3757
- df_disp=cfg[collist].head(rank)
3758
- else:
3759
- rank_flag="后"
3760
- df_disp=cfg[collist].tail(abs(rank))
3761
-
3762
- #强制显示所选股票
3763
- #if force_show_stock and rank != 10:
3764
- if force_show_stock:
3765
- #所选股票是否在其中?
3766
- if not ticker[:6] in list(df_disp["代码"]):
3767
- ticker_seq=cfg[cfg["代码"]==ticker[:6]]["序号"].values[0]
3768
- seq1=ticker_seq-4; seq2=ticker_seq+5
3769
- #如果超出开头
3770
- if seq1 <=0:
3771
- seq1=1; seq2=10
3772
- #如果超出结尾
3773
- if seq2 > len(cfg):
3774
- seq2=len(cfg); seq1=len(cfg)-9
3775
-
3776
- #注意:此处的&不能换为and
3777
- df_disp=cfg[(cfg["序号"]>=seq1) & (cfg["序号"]<=seq2)][collist]
3778
-
3779
- #制表
3780
- tname=ticker_name(ticker)
3781
- titletxt="行业板块及其上市公司排名:"+hangye+","+indicator_col+"("+rank_flag+str(abs(rank))+"名)"
3782
-
3783
- footnote1="全行业的"+indicator_col+"状况:均值"+str(round(indicator_mean,2))+",中位数"+str(round(indicator_median,2))+"\n"
3784
- footnote2=tname+"的"+indicator_col+":"+str(round(indicator_value,2))+",行业排名"+str(indicator_seq)+"/"+str(indicator_total)+",分位"+str(round(indicator_pct,2))+"%\n"
3785
- import datetime; stoday = datetime.date.today()
3786
- footnote3="*** 信息来源:东方财富,"+str(stoday)
3787
- footnote=footnote1+footnote2+footnote3
3788
-
3789
- #确定表格字体大小
3790
- titile_font_size=font_size
3791
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
3792
-
3793
- df_display_CSS(df_disp,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
3794
- first_col_align='center',second_col_align='left', \
3795
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
3796
- data_font_size=data_font_size)
3797
-
3798
- return cfg
3799
-
3800
- #==============================================================================
3801
- if __name__=='__main__':
3802
- concept='酒'
3803
- concept='股'
3804
- concept='白酒'
3805
-
3806
- ticker="600519.SS"
3807
- indicator="市盈率"
3808
- rank=11
3809
-
3810
- force_show_stock=True
3811
-
3812
- font_size="16px"; facecolor="papayawhip"; numberPerLine=7
3813
-
3814
-
3815
- def concept_stocks_em(concept='',ticker='',indicator="市盈率",rank=10, \
3816
- force_show_stock=False, \
3817
- font_size="16px",facecolor="papayawhip",numberPerLine=5):
3818
- """
3819
- ===========================================================================
3820
- 功能:基于东方财富概念板块,查找关键字相关概念以及股票业绩。
3821
- 特点:概念板块划分细致,同一股票可能分属多个板块,与行业分类不同。
3822
- 参数:
3823
- concept:概念板块名称,或名称中的关键字,默认''输出所有概念板块名称;
3824
- 若查找到多个名称,则优先输出查找到的名称;
3825
- 若仅仅找到一个板块,则按indicator输出该板块的股票排行。
3826
- ticker:股票代码,默认''
3827
- indicator:指标名称,默认"市盈率",还支持:股价,市净率,涨跌幅,流动性,换手率。
3828
- rank:排名,支持正负数,默认10。
3829
- force_show_stock:若ticker不为空,且不在rank范围内,是否强制显示该股票,默认False。
3830
- font_size:显示字体大小,默认"16px"。
3831
- facecolor:表格背景颜色,默认"papayawhip"。
3832
- numberPerLine:当显示板块名称时,每行显示个数,默认5。
3833
-
3834
- 返回值:df
3835
- """
3836
- if indicator in ["股价","股票价格","价格","收盘价","价位"]:
3837
- indicator="最新价"
3838
- if indicator in ["流动性","热门","活跃"]:
3839
- indicator="换手率"
3840
-
3841
- if rank==0:
3842
- rank=5
3843
-
3844
- import akshare as ak
3845
-
3846
- #获取所有板块信息
3847
- try:
3848
- concept_df = ak.stock_board_concept_name_em()
3849
- except:
3850
- print(" #Warning(concept_stocks_em): data source is unaccessible, check network")
3851
- return None
3852
-
3853
- concept_list=list(concept_df["板块名称"])
3854
- concept_list2 = [x for x in concept_list if "昨日" not in x]
3855
- concept_list_tmp = [x for x in concept_list2 if concept in x]
3856
-
3857
- #未找到
3858
- if len(concept_list_tmp)==0 or concept=='':
3859
- if concept!='':
3860
- print(" #Warning(concept_stocks_em): concept not found with",concept)
3861
- print(" Available concepts in East Money:",len(concept_list2),end='')
3862
- printInLine_md(concept_list2,numberPerLine=numberPerLine,colalign="center")
3863
- #print(" Tips: you can use one of the concepts above to re-run the command")
3864
-
3865
- return None
3866
-
3867
- #找到多个概念板块
3868
- if len(concept_list_tmp)>1 and len(concept_list_tmp)<numberPerLine:
3869
- print(" Concepts found in East Money:",end='')
3870
- print_list(concept_list_tmp,leading_blanks=2)
3871
- #print(" Tips: you can use one of the concepts above to re-run the command")
3872
-
3873
- return None
3874
-
3875
- if len(concept_list_tmp)>numberPerLine and concept!='':
3876
- print(" Concepts found in East Money:",len(concept_list_tmp),end='')
3877
- printInLine_md(concept_list_tmp,numberPerLine=numberPerLine,colalign="center")
3878
- #print(" Tips: you can use one of the concepts above to re-run the command")
3879
-
3880
- return None
3881
-
3882
- #找到一个概念板块,详细处理
3883
- #if len(concept_list_tmp)==1:
3884
- concept=concept_list_tmp[0]
3885
-
3886
- #东方财富概念板块成分股
3887
- cfg=ak.stock_board_concept_cons_em(concept)
3888
-
3889
- cfg_col_list=list(cfg)
3890
- indicator_col=''
3891
- for i in cfg_col_list:
3892
- if indicator in i:
3893
- indicator_col=i
3894
- break
3895
-
3896
- if indicator_col=='':
3897
- print(" #Warning(concept_stocks_em): unsupported indicator",indicator)
3898
-
3899
- remove_list=["序号","代码","名称"]
3900
- cfg_col_list_tmp = [x for x in cfg_col_list if x not in remove_list]
3901
- print(" Supported indicators:",)
3902
- print_list(cfg_col_list_tmp,leading_blanks=2)
3903
-
3904
- return None
3905
-
3906
- collist=['序号','名称','代码','市盈率-动态','市净率','最新价','涨跌幅','换手率']
3907
- if not indicator_col in collist:
3908
- collist=collist+[indicator_col]
3909
-
3910
- #重新排序
3911
- cfg.sort_values(by=[indicator_col]+["代码"],ascending=[False,True],inplace=True)
3912
- cfg.reset_index(drop=True,inplace=True)
3913
- cfg['序号']=cfg.index+1
3914
-
3915
- #行业均值与中位数
3916
- indicator_mean=cfg[indicator_col].mean()
3917
- indicator_median=cfg[indicator_col].median()
3918
- indicator_total=cfg["代码"].count()
3919
-
3920
- found_stock=False
3921
- stock_list=list(cfg['代码'])
3922
- if ticker[:6] in stock_list:
3923
- found_stock=True
3924
- else:
3925
- if not ticker=='':
3926
- print(" #Warning(concept_stocks_em): stock not found for",ticker)
3927
-
3928
- if found_stock:
3929
- indicator_value=cfg[cfg['代码']==ticker[:6]][indicator_col].values[0]
3930
- indicator_seq=cfg[cfg['代码']==ticker[:6]]["序号"].values[0]
3931
- from scipy.stats import percentileofscore
3932
- indicator_pct=percentileofscore(cfg[indicator_col],indicator_value)
3933
-
3934
- #筛选
3935
- if rank > 0:
3936
- rank_flag="前"
3937
- df_disp=cfg[collist].head(rank)
3938
- else:
3939
- rank_flag="后"
3940
- df_disp=cfg[collist].tail(abs(rank))
3941
-
3942
- #强制显示所选股票
3943
- if force_show_stock:
3944
- #所选股票是否在其中?
3945
- if not ticker[:6] in list(df_disp["代码"]):
3946
- ticker_seq=cfg[cfg["代码"]==ticker[:6]]["序号"].values[0]
3947
- seq1=ticker_seq-4; seq2=ticker_seq+5
3948
- #如果超出开头
3949
- if seq1 <=0:
3950
- seq1=1; seq2=10
3951
- #如果超出结尾
3952
- if seq2 > len(cfg):
3953
- seq2=len(cfg); seq1=len(cfg)-9
3954
-
3955
- #注意:此处的&不能换为and
3956
- df_disp=cfg[(cfg["序号"]>=seq1) & (cfg["序号"]<=seq2)][collist]
3957
-
3958
-
3959
- #制表
3960
- titletxt="概念板块及其上市公司排名:"+concept+","+indicator_col+"("+rank_flag+str(abs(rank))+"名)"
3961
-
3962
- footnote1="概念板块的"+indicator_col+"整体状况:均值"+str(round(indicator_mean,2))+",中位数"+str(round(indicator_median,2))+"\n"
3963
- footnote2=''
3964
- if found_stock:
3965
- tname=ticker_name(ticker)
3966
- footnote2=tname+"的"+indicator_col+":"+str(round(indicator_value,2))+",板块排名"+str(indicator_seq)+"/"+str(indicator_total)+",分位"+str(round(indicator_pct,2))+"%\n"
3967
- else:
3968
- footnote2="概念板块:"+concept+",成分股数量"+str(len(cfg))+'\n'
3969
-
3970
- import datetime; stoday = datetime.date.today()
3971
- footnote3="*** 信息来源:东方财富,"+str(stoday)
3972
- if found_stock:
3973
- footnote=footnote1+footnote2+footnote3
3974
- else:
3975
- footnote=footnote2+footnote1+footnote3
3976
-
3977
- #确定表格字体大小
3978
- titile_font_size=font_size
3979
- heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
3980
-
3981
- df_display_CSS(df_disp,titletxt=titletxt,footnote=footnote,facecolor=facecolor, \
3982
- first_col_align='center',second_col_align='left', \
3983
- titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
3984
- data_font_size=data_font_size)
3985
-
3986
- return cfg
3987
-
3988
-
3989
-
3990
-
3991
- #==============================================================================
3992
- if __name__ == '__main__':
3993
- ticker='600519.SS'
3994
- ticker='600305.SS'
3995
-
3996
- get_stock_industry_sw(ticker)
3997
-
3998
- def get_stock_industry_sw(ticker):
3999
- """
4000
- ===========================================================================
4001
- 功能:抓取股票的申万行业分类名称
4002
- 参数:
4003
- ticker:A股股票代码
4004
-
4005
- 返回:申万行业分类名称
4006
- """
4007
-
4008
- import requests
4009
- from bs4 import BeautifulSoup
4010
-
4011
- ticker6=ticker[:6]
4012
- url=f"https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpOtherInfo/stockid/{ticker6}/menu_num/2.phtml"
4013
- headers = {
4014
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
4015
- }
4016
- response = requests.get(url,headers=headers)
4017
-
4018
- if response.status_code == 200:
4019
- soup = BeautifulSoup(response.text, 'html.parser')
4020
-
4021
- t = soup.find('table',class_="comInfo1")
4022
- industry = t.find_all("tr")[2].find("td").text
4023
-
4024
- return industry
4025
- else:
4026
- return ''
4027
-
4028
- #==============================================================================
4029
- if __name__ == '__main__':
4030
- ticker='600519.SS'
4031
- ticker='600305.SS'
4032
-
4033
- stock_peers_sw(ticker)
4034
-
4035
- def stock_peers_sw(ticker):
4036
- """
4037
- ===========================================================================
4038
- 功能:显示股票的申万行业分类及其同行
4039
- 参数:
4040
- ticker:A股股票代码
4041
- 返回:无
4042
- """
4043
-
4044
- try:
4045
- hangye=get_stock_industry_sw(ticker)
4046
- except:
4047
- print(" #Warning(stock_peers_sw): industry info not found for",ticker)
4048
- return
4049
-
4050
- if hangye=='':
4051
- print(" #Warning(stock_peers_sw): found empty industry for",ticker)
4052
- return
4053
-
4054
- ilist=[]; hangye_final=''
4055
- #三级行业优先
4056
- hangye3=hangye+'Ⅲ'
4057
- try:
4058
- ilist=print_industry_component_sw(iname=hangye3,return_result=True)
4059
- hangye_final=hangye3
4060
- except:
4061
- #二级行业次优先
4062
- hangye2=hangye+'Ⅱ'
4063
- try:
4064
- ilist=print_industry_component_sw(iname=hangye2,return_result=True)
4065
- hangye_final=hangye2
4066
- except:
4067
- try:
4068
- ilist=print_industry_component_sw(iname=hangye,return_result=True)
4069
- hangye_final=hangye
4070
- except:
4071
- print("\n #Warning(stock_peers_sw): failed to search peers for",ticker)
4072
- print(" Possible solutions:")
4073
- print(" Try first: upgrade akshare, restart Jupyter and try again")
4074
- #print(" If not working, uninstall anaconda and reinstall a newer version")
4075
-
4076
- #查找股票在行业板块中的位置
4077
- ticker_item=''
4078
- if not ilist=='':
4079
- ticker6=ticker[:6]
4080
- for i in ilist:
4081
- if ticker6 in i:
4082
- ticker_item=i
4083
- ticker_pos=ilist.index(i)+1
4084
- break
4085
- if ticker_item != '':
4086
- footnote0="注:"
4087
- footnote1=ticker_item+"在申万行业"+hangye_final+"指数中的权重排名为"+str(ticker_pos)+'/'+str(len(ilist))
4088
- footnote2="该指数的权重排名依据主要包括公司的市值规模、流动性以及市场代表性"
4089
- footnote=footnote0+'\n'+footnote1+'\n'+footnote2
4090
- print(footnote)
4091
- return
4092
-
4093
- #==============================================================================
4094
- if __name__ == '__main__':
4095
- sw_index=['绩优股指数','大盘指数','中市盈率指数','高市净率指数',]
4096
- sw_index=['大类风格-先进制造','大类风格--医药医疗']
4097
-
4098
- index_intersection_sw(sw_index)
4099
-
4100
- def index_intersection_sw(sw_index=[]):
4101
- """
4102
- ===========================================================================
4103
- 功能:寻找多个申万指数中共同的成分股
4104
- 主要参数:
4105
- sw_index:申万行业分类指数列表,至少两个指数。
4106
- 返回值:无
4107
- 示例:
4108
- sw_index=['绩优股指数','大盘指数','中市盈率指数','高市净率指数']
4109
- """
4110
- #寻找多个申万指数中共同的成分股
4111
- if len(sw_index)==0:
4112
- print(" #Warning(stock_intersection_sw): no index found for intersection")
4113
- return
4114
-
4115
- if isinstance(sw_index,str):
4116
- sw_index=[sw_index]
4117
-
4118
- result_list=[]
4119
- for i in sw_index:
4120
- try:
4121
- ilist=print_industry_component_sw(i,printout=False,return_result=True)
4122
- except:
4123
- print(" #Warning(stock_intersection_sw): failed to find component for index",i)
4124
- continue
4125
-
4126
- if len(result_list)==0:
4127
- result_list=[ilist]
4128
- else:
4129
- result_list=result_list+[ilist]
4130
-
4131
- list_intersection(result_list)
4132
-
4133
- return
4134
- #==============================================================================
4135
- #==============================================================================
4136
- #==============================================================================
4137
- #==============================================================================
4138
- #==============================================================================
4139
- #==============================================================================
4140
-