siat 3.10.131__py3-none-any.whl → 3.10.132__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. build/lib/build/lib/siat/__init__.py +75 -0
  2. build/lib/build/lib/siat/allin.py +137 -0
  3. build/lib/build/lib/siat/assets_liquidity.py +915 -0
  4. build/lib/build/lib/siat/beta_adjustment.py +1058 -0
  5. build/lib/build/lib/siat/beta_adjustment_china.py +548 -0
  6. build/lib/build/lib/siat/blockchain.py +143 -0
  7. build/lib/build/lib/siat/bond.py +2900 -0
  8. build/lib/build/lib/siat/bond_base.py +992 -0
  9. build/lib/build/lib/siat/bond_china.py +100 -0
  10. build/lib/build/lib/siat/bond_zh_sina.py +143 -0
  11. build/lib/build/lib/siat/capm_beta.py +783 -0
  12. build/lib/build/lib/siat/capm_beta2.py +887 -0
  13. build/lib/build/lib/siat/common.py +5360 -0
  14. build/lib/build/lib/siat/compare_cross.py +642 -0
  15. build/lib/build/lib/siat/copyrights.py +18 -0
  16. build/lib/build/lib/siat/cryptocurrency.py +667 -0
  17. build/lib/build/lib/siat/economy.py +1471 -0
  18. build/lib/build/lib/siat/economy2.py +1853 -0
  19. build/lib/build/lib/siat/esg.py +536 -0
  20. build/lib/build/lib/siat/event_study.py +815 -0
  21. build/lib/build/lib/siat/fama_french.py +1521 -0
  22. build/lib/build/lib/siat/fin_stmt2_yahoo.py +982 -0
  23. build/lib/build/lib/siat/financial_base.py +1160 -0
  24. build/lib/build/lib/siat/financial_statements.py +598 -0
  25. build/lib/build/lib/siat/financials.py +2339 -0
  26. build/lib/build/lib/siat/financials2.py +1278 -0
  27. build/lib/build/lib/siat/financials_china.py +4433 -0
  28. build/lib/build/lib/siat/financials_china2.py +2212 -0
  29. build/lib/build/lib/siat/fund.py +629 -0
  30. build/lib/build/lib/siat/fund_china.py +3307 -0
  31. build/lib/build/lib/siat/future_china.py +551 -0
  32. build/lib/build/lib/siat/google_authenticator.py +47 -0
  33. build/lib/build/lib/siat/grafix.py +3636 -0
  34. build/lib/build/lib/siat/holding_risk.py +867 -0
  35. build/lib/build/lib/siat/luchy_draw.py +638 -0
  36. build/lib/build/lib/siat/market_china.py +1168 -0
  37. build/lib/build/lib/siat/markowitz.py +2363 -0
  38. build/lib/build/lib/siat/markowitz2.py +3150 -0
  39. build/lib/build/lib/siat/markowitz2_20250704.py +2969 -0
  40. build/lib/build/lib/siat/markowitz2_20250705.py +3158 -0
  41. build/lib/build/lib/siat/markowitz_simple.py +373 -0
  42. build/lib/build/lib/siat/ml_cases.py +2291 -0
  43. build/lib/build/lib/siat/ml_cases_example.py +60 -0
  44. build/lib/build/lib/siat/option_china.py +3069 -0
  45. build/lib/build/lib/siat/option_pricing.py +1925 -0
  46. build/lib/build/lib/siat/other_indexes.py +409 -0
  47. build/lib/build/lib/siat/risk_adjusted_return.py +1576 -0
  48. build/lib/build/lib/siat/risk_adjusted_return2.py +1900 -0
  49. build/lib/build/lib/siat/risk_evaluation.py +2218 -0
  50. build/lib/build/lib/siat/risk_free_rate.py +351 -0
  51. build/lib/build/lib/siat/sector_china.py +4140 -0
  52. build/lib/build/lib/siat/security_price2.py +727 -0
  53. build/lib/build/lib/siat/security_prices.py +3408 -0
  54. build/lib/build/lib/siat/security_trend.py +402 -0
  55. build/lib/build/lib/siat/security_trend2.py +646 -0
  56. build/lib/build/lib/siat/stock.py +4284 -0
  57. build/lib/build/lib/siat/stock_advice_linear.py +934 -0
  58. build/lib/build/lib/siat/stock_base.py +26 -0
  59. build/lib/build/lib/siat/stock_china.py +2095 -0
  60. build/lib/build/lib/siat/stock_prices_kneighbors.py +910 -0
  61. build/lib/build/lib/siat/stock_prices_linear.py +386 -0
  62. build/lib/build/lib/siat/stock_profile.py +707 -0
  63. build/lib/build/lib/siat/stock_technical.py +3305 -0
  64. build/lib/build/lib/siat/stooq.py +74 -0
  65. build/lib/build/lib/siat/transaction.py +347 -0
  66. build/lib/build/lib/siat/translate.py +5183 -0
  67. build/lib/build/lib/siat/valuation.py +1378 -0
  68. build/lib/build/lib/siat/valuation_china.py +2076 -0
  69. build/lib/build/lib/siat/var_model_validation.py +444 -0
  70. build/lib/build/lib/siat/yf_name.py +811 -0
  71. build/lib/siat/__init__.py +75 -0
  72. build/lib/siat/allin.py +137 -0
  73. build/lib/siat/assets_liquidity.py +915 -0
  74. build/lib/siat/beta_adjustment.py +1058 -0
  75. build/lib/siat/beta_adjustment_china.py +548 -0
  76. build/lib/siat/blockchain.py +143 -0
  77. build/lib/siat/bond.py +2900 -0
  78. build/lib/siat/bond_base.py +992 -0
  79. build/lib/siat/bond_china.py +100 -0
  80. build/lib/siat/bond_zh_sina.py +143 -0
  81. build/lib/siat/capm_beta.py +783 -0
  82. build/lib/siat/capm_beta2.py +887 -0
  83. build/lib/siat/common.py +5360 -0
  84. build/lib/siat/compare_cross.py +642 -0
  85. build/lib/siat/copyrights.py +18 -0
  86. build/lib/siat/cryptocurrency.py +667 -0
  87. build/lib/siat/economy.py +1471 -0
  88. build/lib/siat/economy2.py +1853 -0
  89. build/lib/siat/esg.py +536 -0
  90. build/lib/siat/event_study.py +815 -0
  91. build/lib/siat/fama_french.py +1521 -0
  92. build/lib/siat/fin_stmt2_yahoo.py +982 -0
  93. build/lib/siat/financial_base.py +1160 -0
  94. build/lib/siat/financial_statements.py +598 -0
  95. build/lib/siat/financials.py +2339 -0
  96. build/lib/siat/financials2.py +1278 -0
  97. build/lib/siat/financials_china.py +4433 -0
  98. build/lib/siat/financials_china2.py +2212 -0
  99. build/lib/siat/fund.py +629 -0
  100. build/lib/siat/fund_china.py +3307 -0
  101. build/lib/siat/future_china.py +551 -0
  102. build/lib/siat/google_authenticator.py +47 -0
  103. build/lib/siat/grafix.py +3636 -0
  104. build/lib/siat/holding_risk.py +867 -0
  105. build/lib/siat/luchy_draw.py +638 -0
  106. build/lib/siat/market_china.py +1168 -0
  107. build/lib/siat/markowitz.py +2363 -0
  108. build/lib/siat/markowitz2.py +3150 -0
  109. build/lib/siat/markowitz2_20250704.py +2969 -0
  110. build/lib/siat/markowitz2_20250705.py +3158 -0
  111. build/lib/siat/markowitz_simple.py +373 -0
  112. build/lib/siat/ml_cases.py +2291 -0
  113. build/lib/siat/ml_cases_example.py +60 -0
  114. build/lib/siat/option_china.py +3069 -0
  115. build/lib/siat/option_pricing.py +1925 -0
  116. build/lib/siat/other_indexes.py +409 -0
  117. build/lib/siat/risk_adjusted_return.py +1576 -0
  118. build/lib/siat/risk_adjusted_return2.py +1900 -0
  119. build/lib/siat/risk_evaluation.py +2218 -0
  120. build/lib/siat/risk_free_rate.py +351 -0
  121. build/lib/siat/sector_china.py +4140 -0
  122. build/lib/siat/security_price2.py +727 -0
  123. build/lib/siat/security_prices.py +3408 -0
  124. build/lib/siat/security_trend.py +402 -0
  125. build/lib/siat/security_trend2.py +646 -0
  126. build/lib/siat/stock.py +4284 -0
  127. build/lib/siat/stock_advice_linear.py +934 -0
  128. build/lib/siat/stock_base.py +26 -0
  129. build/lib/siat/stock_china.py +2095 -0
  130. build/lib/siat/stock_prices_kneighbors.py +910 -0
  131. build/lib/siat/stock_prices_linear.py +386 -0
  132. build/lib/siat/stock_profile.py +707 -0
  133. build/lib/siat/stock_technical.py +3305 -0
  134. build/lib/siat/stooq.py +74 -0
  135. build/lib/siat/transaction.py +347 -0
  136. build/lib/siat/translate.py +5183 -0
  137. build/lib/siat/valuation.py +1378 -0
  138. build/lib/siat/valuation_china.py +2076 -0
  139. build/lib/siat/var_model_validation.py +444 -0
  140. build/lib/siat/yf_name.py +811 -0
  141. siat/__init__.py +0 -0
  142. siat/allin.py +0 -0
  143. siat/assets_liquidity.py +0 -0
  144. siat/beta_adjustment.py +0 -0
  145. siat/beta_adjustment_china.py +0 -0
  146. siat/blockchain.py +0 -0
  147. siat/bond.py +0 -0
  148. siat/bond_base.py +0 -0
  149. siat/bond_china.py +0 -0
  150. siat/bond_zh_sina.py +0 -0
  151. siat/capm_beta.py +0 -0
  152. siat/capm_beta2.py +0 -0
  153. siat/common.py +136 -3
  154. siat/compare_cross.py +0 -0
  155. siat/copyrights.py +0 -0
  156. siat/cryptocurrency.py +0 -0
  157. siat/economy.py +0 -0
  158. siat/economy2.py +0 -0
  159. siat/esg.py +0 -0
  160. siat/event_study.py +0 -0
  161. siat/exchange_bond_china.pickle +0 -0
  162. siat/fama_french.py +0 -0
  163. siat/fin_stmt2_yahoo.py +0 -0
  164. siat/financial_base.py +0 -0
  165. siat/financial_statements.py +0 -0
  166. siat/financials.py +0 -0
  167. siat/financials2.py +0 -0
  168. siat/financials_china.py +0 -0
  169. siat/financials_china2.py +0 -0
  170. siat/fund.py +0 -0
  171. siat/fund_china.pickle +0 -0
  172. siat/fund_china.py +0 -0
  173. siat/future_china.py +0 -0
  174. siat/google_authenticator.py +0 -0
  175. siat/grafix.py +1 -1
  176. siat/holding_risk.py +0 -0
  177. siat/luchy_draw.py +0 -0
  178. siat/market_china.py +1 -1
  179. siat/markowitz.py +0 -0
  180. siat/markowitz2.py +240 -39
  181. siat/markowitz2_20250704.py +2969 -0
  182. siat/markowitz2_20250705.py +3158 -0
  183. siat/markowitz_simple.py +0 -0
  184. siat/ml_cases.py +0 -0
  185. siat/ml_cases_example.py +0 -0
  186. siat/option_china.py +0 -0
  187. siat/option_pricing.py +0 -0
  188. siat/other_indexes.py +0 -0
  189. siat/risk_adjusted_return.py +0 -0
  190. siat/risk_adjusted_return2.py +0 -0
  191. siat/risk_evaluation.py +0 -0
  192. siat/risk_free_rate.py +0 -0
  193. siat/sector_china.py +0 -0
  194. siat/security_price2.py +0 -0
  195. siat/security_prices.py +3 -1
  196. siat/security_trend.py +0 -0
  197. siat/security_trend2.py +1 -1
  198. siat/stock.py +4 -2
  199. siat/stock_advice_linear.py +0 -0
  200. siat/stock_base.py +0 -0
  201. siat/stock_china.py +0 -0
  202. siat/stock_info.pickle +0 -0
  203. siat/stock_prices_kneighbors.py +0 -0
  204. siat/stock_prices_linear.py +0 -0
  205. siat/stock_profile.py +0 -0
  206. siat/stock_technical.py +0 -0
  207. siat/stooq.py +0 -0
  208. siat/transaction.py +0 -0
  209. siat/translate.py +11 -11
  210. siat/valuation.py +0 -0
  211. siat/valuation_china.py +0 -0
  212. siat/var_model_validation.py +0 -0
  213. siat/yf_name.py +0 -0
  214. {siat-3.10.131.dist-info → siat-3.10.132.dist-info}/METADATA +235 -227
  215. siat-3.10.132.dist-info/RECORD +218 -0
  216. {siat-3.10.131.dist-info → siat-3.10.132.dist-info}/WHEEL +1 -1
  217. {siat-3.10.131.dist-info → siat-3.10.132.dist-info/licenses}/LICENSE +0 -0
  218. siat-3.10.132.dist-info/top_level.txt +4 -0
  219. siat-3.10.131.dist-info/RECORD +0 -76
  220. siat-3.10.131.dist-info/top_level.txt +0 -1
@@ -0,0 +1,2095 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 模块功能:借助机器学习学习方法,预测次日股票价格走势,仅适用于中国大陆股票
4
+ 模型:最近邻模型
5
+ 算法:借助个股过去一百个交易日的资金净流入/净流出以及大盘走势变化,进行机器学习
6
+ 注意:如果在当日未收盘时运行,预测的是当日个股收盘价的走势;若在收盘后运行则预测次日走势
7
+ 作者:王德宏,北京外国语大学国际商学院
8
+ 日期:2021-5-13
9
+ """
10
+ #==============================================================================
11
+ import warnings; warnings.filterwarnings('ignore')
12
+
13
+ from siat.common import *
14
+ from siat.translate import *
15
+ from siat.grafix import *
16
+ from siat.security_prices import *
17
+ from siat.security_price2 import *
18
+ #==============================================================================
19
+ # 获得个股近一百个交易日的资金净流入数据
20
+ #==============================================================================
21
+
22
+ if __name__=='__main__':
23
+ ticker='600519.Ss'
24
+
25
+ dfp=get_money_flowin(ticker)
26
+
27
+ end='2023-6-15'
28
+ dfp=get_money_flowin(ticker,end)
29
+
30
+ def get_money_flowin(ticker,end='latest'):
31
+ """
32
+ 功能:抓取个股近一百个交易日的资金净流入情况,以及大盘指数的情况
33
+ ticker:个股代码,带后缀
34
+ 标准化方法:原始数据
35
+ 注意:目前仅支持沪深股市
36
+ """
37
+ ticker1=ticker.upper()[:6]
38
+ exch=ticker.upper()[7:9]
39
+
40
+ import akshare as ak
41
+ import pandas as pd
42
+
43
+ #判断沪深市场
44
+ if exch=='':
45
+ l1=ticker[0]; market='sh'
46
+ if l1 in ['0','2','3']: market='sz'
47
+ #深市股票以0/2/3开头,沪市以6/9开头
48
+ else:
49
+ if exch=='SS':
50
+ market='sh'
51
+ elif exch=='SZ':
52
+ market='sz'
53
+ else:
54
+ print(" #Warning(get_money_flowin): currently only support Shanghai/Shenzhen stock exchanges")
55
+ return None
56
+
57
+ #获得个股资金流动明细
58
+ try:
59
+ df = ak.stock_individual_fund_flow(stock=ticker1, market=market)
60
+ except:
61
+ print(" #Error(get_money_flowin): money flow info unavailable for",ticker)
62
+ return None
63
+
64
+ df['ticker']=ticker
65
+
66
+ df['Date']=df['日期'].apply(lambda x: pd.to_datetime(x)) #不带时区的日期
67
+ df.set_index('Date',inplace=True)
68
+ df['date']=df['日期']
69
+
70
+ df['Close']=df['收盘价'].apply(lambda x: float(x))
71
+ df['Change%']=df['涨跌幅'].apply(lambda x: float(x))
72
+
73
+ #类型转换
74
+ df['netFlowInAmount_main']=df['主力净流入-净额'].apply(lambda x: float(x))
75
+ df['netFlowInAmount_small']=df['小单净流入-净额'].apply(lambda x: float(x))
76
+ df['netFlowInAmount_mid']=df['中单净流入-净额'].apply(lambda x: float(x))
77
+ df['netFlowInAmount_big']=df['大单净流入-净额'].apply(lambda x: float(x))
78
+ df['netFlowInAmount_super']=df['超大单净流入-净额'].apply(lambda x: float(x))
79
+
80
+ # 总净流入金额:可正可负
81
+ df['netFlowInAmount']=df['netFlowInAmount_main']+df['netFlowInAmount_small']+ \
82
+ df['netFlowInAmount_mid']+df['netFlowInAmount_big']+ \
83
+ df['netFlowInAmount_super']
84
+
85
+ df['netFlowInRatio%_main']=df['主力净流入-净占比'].apply(lambda x: float(x))
86
+ df['netFlowInRatio%_small']=df['小单净流入-净占比'].apply(lambda x: float(x))
87
+ df['netFlowInRatio%_mid']=df['中单净流入-净占比'].apply(lambda x: float(x))
88
+ df['netFlowInRatio%_big']=df['大单净流入-净占比'].apply(lambda x: float(x))
89
+ df['netFlowInRatio%_super']=df['超大单净流入-净占比'].apply(lambda x: float(x))
90
+
91
+ #重要:处理有缺失值的记录,确保未收盘时能预测当天的收盘价涨跌方向
92
+ #df.dropna(inplace=True)
93
+ df.fillna(0,inplace=True)
94
+
95
+ # 对数量级巨大的金额项进行对数处理,避免其对数量级小的项形成数量级压制
96
+ amtColList=['netFlowInAmount_main','netFlowInAmount_small','netFlowInAmount_mid', \
97
+ 'netFlowInAmount_big','netFlowInAmount_super','netFlowInAmount']
98
+ import math
99
+ for a in amtColList:
100
+ # 取对数
101
+ df[a+'_ln']=df[a].apply(lambda x: math.log(x) if x>0 else -math.log(-x) if x<0 else 0)
102
+
103
+ # 删除原有金额项目
104
+ df.drop(amtColList,axis=1,inplace=True)
105
+
106
+ #去掉不用的字段
107
+ dfdroplist=['主力净流入-净额','小单净流入-净额','中单净流入-净额','大单净流入-净额', \
108
+ '超大单净流入-净额','主力净流入-净占比','小单净流入-净占比', \
109
+ '中单净流入-净占比','大单净流入-净占比','超大单净流入-净占比', \
110
+ '收盘价','涨跌幅','日期']
111
+ df.drop(labels=dfdroplist,axis=1,inplace=True)
112
+
113
+ df.sort_index(ascending=True,inplace=True)
114
+ fromdate=df['date'].values[0]
115
+ todate=df['date'].values[-1]
116
+
117
+ #获得大盘指数和交易量
118
+ dpindex="000001.SS" #上证综合指数
119
+ if market == 'sz': dpindex="399001.SZ" #深圳成分指数
120
+ df_dp_tmp=get_price(dpindex,fromdate,todate)
121
+
122
+ df_dp_tmp['Volume_mkt']=df_dp_tmp['Volume'].apply(lambda x: math.log(x))
123
+ df_dp_tmp['Close_mkt']=df_dp_tmp['Close']
124
+ df_dp=df_dp_tmp[['Volume_mkt','Close_mkt']]
125
+
126
+ # 获得股票交易量
127
+ df_stk_tmp=get_price(ticker,fromdate,todate)
128
+ df_stk_tmp['Volume_ln']=df_stk_tmp['Volume'].apply(lambda x: math.log(x))
129
+ df_stk=df_stk_tmp[['Open','High','Low','Adj Close','Volume_ln']]
130
+
131
+ # 合并
132
+ df_stk_mkt=pd.merge(df_stk,df_dp,how='left',left_index=True,right_index=True)
133
+
134
+ dfp_tmp=pd.merge(df,df_stk_mkt,how='left',left_index=True,right_index=True)
135
+ newColList=['date','Close','Change%','Adj Close','Open','High','Low','Volume_ln', \
136
+ 'netFlowInRatio%_main','netFlowInRatio%_small','netFlowInRatio%_mid', \
137
+ 'netFlowInRatio%_big','netFlowInRatio%_super', \
138
+ 'netFlowInAmount_main_ln','netFlowInAmount_small_ln','netFlowInAmount_mid_ln', \
139
+ 'netFlowInAmount_big_ln','netFlowInAmount_super_ln', 'netFlowInAmount_ln', \
140
+ 'Close_mkt','Volume_mkt']
141
+ dfp=dfp_tmp[newColList]
142
+
143
+ if end=='latest':
144
+ dfp2=dfp
145
+ else:
146
+ jieguo,end2=check_date2(end)
147
+ if not jieguo:
148
+ dfp2=dfp
149
+ else:
150
+ dfp2=dfp[dfp['date'] <= end2]
151
+
152
+ return dfp2
153
+
154
+
155
+ #==============================================================================
156
+ # 对特征数据进行预处理
157
+ #==============================================================================
158
+
159
+ def preprocess(X,preproctype='nop'):
160
+ """
161
+ 功能:对特征数据X进行标准化预处理,不处理标签数据y
162
+ df:原始数据
163
+ preproctype:默认'nop'(不处理),
164
+ 还支持'0-1'(标准缩放法)、'min-max'(区间缩放法)和'log'(分别取对数)
165
+ """
166
+ typelist=['0-1','min-max','log','nop']
167
+ if not (preproctype in typelist):
168
+ print(' #Error(preproc): not supported for preprocess type',preproctype)
169
+ print(' Supported preprocess types:',typelist)
170
+ return None
171
+
172
+ import pandas as pd
173
+ collist=list(X)
174
+ scaler_X=X.copy()
175
+ #标准化——(0-1标准化)
176
+ if preproctype == '0-1':
177
+ for c in collist:
178
+ value_min=scaler_X[c].min()
179
+ value_max=scaler_X[c].max()
180
+ scaler_X[c]=(scaler_X[c]-value_min)/(value_max-value_min)
181
+
182
+ #标准化——(区间缩放法)
183
+ if preproctype == 'min-max':
184
+ for c in collist:
185
+ value_mean=scaler_X[c].mean()
186
+ value_std=scaler_X[c].std()
187
+ scaler_X[c]=(scaler_X[c]-value_mean)/value_std
188
+
189
+ #标准化——(对数法)
190
+ if preproctype == 'log':
191
+ for c in collist:
192
+ scaler_X[c]=scaler_X[c].apply(lambda x: slog(x))
193
+
194
+ #标准化——(不处理)
195
+ if preproctype == 'nop': pass
196
+
197
+ return scaler_X
198
+
199
+ def slog(x):
200
+ '''
201
+ 功能:对x取对数,正数直接取对数,负数先变为正数再取对数加负号,零不操作
202
+ '''
203
+ import numpy as np
204
+ if x == np.nan: return np.nan
205
+ if x == 0: return 0
206
+ if x > 0: return np.log(x)
207
+ if x < 0: return -np.log(-x)
208
+
209
+ if __name__=='__main__':
210
+ scaler_X=preproc(X,preproctype='0-1')
211
+
212
+ #==============================================================================
213
+ # 构造适合机器学习的样本
214
+ #==============================================================================
215
+ if __name__=='__main__':
216
+ ndays=1
217
+ preCumTimes=5
218
+
219
+ dfp=get_money_flowin('600519.SS')
220
+
221
+ X,ydf,X_new=make_sample(dfp,ndays=1)
222
+
223
+ def make_sample(dfp,ndays=1,preCumTimes=5):
224
+ """
225
+ 功能:构造适合机器学习的样本
226
+ ndays:预测未来几个交易日
227
+ preCumTimes:使用过去几倍交易日的累计数据,
228
+ 使用过去交易日的实际天数=preCumTimes * ndays
229
+ preproctype:对特征数据进行预处理的类型
230
+ """
231
+
232
+ preDays=ndays * preCumTimes
233
+
234
+ #构造过去一段时间资金净流入累加值
235
+ amtColList=[]
236
+ colList=list(dfp)
237
+ for c in colList:
238
+ if 'Amount' in c:
239
+ amtColList=amtColList+[c]
240
+
241
+ for c in amtColList:
242
+ dfp[c+'_cum']=dfp[c].rolling(window=preDays,min_periods=1).sum()
243
+
244
+ #构造过去一段时间资金净流入比例均值
245
+ ratioColList=[]
246
+ for c in colList:
247
+ if 'Ratio' in c:
248
+ ratioColList=ratioColList+[c]
249
+
250
+ for c in ratioColList:
251
+ dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
252
+
253
+ #构造过去一段时间大盘指数的均值和标准差
254
+ mktColList=['Close_mkt','Volume_mkt']
255
+ for c in mktColList:
256
+ dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
257
+ dfp[c+'_std']=dfp[c].rolling(window=preDays,min_periods=1).std()
258
+
259
+ #重要:去掉前几行,此处位置敏感
260
+ dfp1=dfp.dropna()
261
+ if len(dfp1)==0:
262
+ print(" #Error(make_sample): dropna caused empty dataframe, process stopped")
263
+ return None,None
264
+
265
+ #按照ndays滚动
266
+ newColList=list(dfp1)
267
+ newColList.remove('date')
268
+ dfp2=dfp1[newColList]
269
+ for c in newColList:
270
+ dfp2[c+'_roll']=dfp2[c].rolling(window=ndays,min_periods=1).mean()
271
+
272
+ newColList2=list(dfp2)
273
+ newColList3=[]
274
+ for c in newColList2:
275
+ if '_roll' in c:
276
+ newColList3=newColList3+[c]
277
+ newColList3=newColList3+['Close','Change%']
278
+ dfp3=dfp2[newColList3]
279
+
280
+ #添加未来一个单位(ndays)的股价信息
281
+ dfp3['Close_next']=dfp3['Close'].shift(-1)
282
+ dfp3['Change%_next']=dfp3['Change%'].shift(-1)
283
+ ylist=['Close_next','Change%_next']
284
+
285
+ #供训练用的数据集
286
+ XCollist=list(dfp3)
287
+ for c in ylist:
288
+ XCollist.remove(c)
289
+ dfp4=dfp3.dropna()
290
+ X = dfp4[XCollist]
291
+ ydf = dfp4[ylist]
292
+
293
+ # X_new
294
+ X_new = dfp3.tail(1)[XCollist]
295
+
296
+ return X,ydf,X_new
297
+
298
+ #==============================================================================
299
+ # 训练模型,获得最优模型参数,进行预测
300
+ #==============================================================================
301
+ if __name__=='__main__':
302
+ noday=1
303
+ y='Close'
304
+ diff=0.03
305
+ min_score=0.6
306
+ votes=100
307
+ max_neighbours=10
308
+ max_RS=10
309
+ printout=True
310
+
311
+ def train_predict_knn(X,ydf,noday=1,y='Close', \
312
+ diff=0.03,min_score=0.6,votes=100,max_neighbours=10,max_RS=10,printout=True):
313
+ """
314
+ 功能:训练模型,选择最优参数,预测
315
+ X:特征矩阵
316
+ ydf:标签矩阵
317
+ nodays:预测未来第几天
318
+ y:标签,默认'Close'为股价,'Change%'为涨跌幅,'Direction'为涨跌方向
319
+
320
+ 未用!
321
+ """
322
+ ylist=['Close','Change%','Direction']
323
+ if not (y in ylist):
324
+ print(" #Error(train_predict_knn):",y,"not within",ylist)
325
+ clflist=['Direction']
326
+ reglist=['Close','Change%']
327
+
328
+ #拆分训练集和测试集
329
+ from sklearn.model_selection import train_test_split
330
+ XX=X[: -noday]
331
+ import numpy as np
332
+ if noday == 1:
333
+ X_new=np.arrary(X[-1:])
334
+ else:
335
+ X_new=np.arrary(X[-noday:-noday+1])
336
+
337
+ yydf=ydf[: -noday]
338
+ yy=yydf[y+'_next'+str(noday)]
339
+
340
+
341
+ if y in clflist:
342
+ from sklearn.neighbors import KNeighborsClassifier
343
+ if y in reglist:
344
+ from sklearn.neighbors import KNeighborsRegressor
345
+
346
+ #寻找最优模型参数
347
+ nlist=list(range(1,max_neighbours+1))
348
+ n_num=len(nlist)
349
+ wlist=['uniform','distance']
350
+ mlist1=['braycurtis','canberra','correlation','dice','hamming','jaccard']
351
+ mlist2=['kulsinski','matching','rogerstanimoto','russellrao']
352
+ mlist3=['sokalmichener','sokalsneath','sqeuclidean','yule','chebyshev']
353
+ mlist4=['cityblock','euclidean','minkowski','cosine']
354
+ mlist=mlist1+mlist2+mlist3+mlist4
355
+ rslist=list(range(0,max_RS+1))
356
+ results=pd.DataFrame(columns=('spread','train_score','test_score', \
357
+ 'neighbours','weight','metric','random','pred'))
358
+ print('\n Searching for best parameters of knn model in',ndays,'trading days ...')
359
+ print(' Progress: 0%, ',end='')
360
+ for n in nlist:
361
+ for w in wlist:
362
+ for m in mlist:
363
+ for rs in rslist:
364
+ X_train,X_test,y_train,y_test=train_test_split(XX,yy,random_state=rs)
365
+
366
+ if y in clflist:
367
+ knn1=KNeighborsClassifier(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
368
+ if y in reglist:
369
+ knn1=KNeighborsClassifier(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
370
+ knn1.fit(X_train, y_train)
371
+ train_score=round(knn1.score(X_train, y_train),3)
372
+ test_score=round(knn1.score(X_test, y_test),3)
373
+
374
+ prediction=knn1.predict(X_new)[0]
375
+ spread=abs(round(train_score-test_score,3))
376
+
377
+ row=pd.Series({'spread':spread,'train_score':train_score, \
378
+ 'test_score':test_score,'neighbours':n, \
379
+ 'weight':w,'metric':m,'random':rs,'pred':prediction})
380
+ try:
381
+ results=results.append(row,ignore_index=True)
382
+ except:
383
+ results=results._append(row,ignore_index=True)
384
+
385
+ print(int(n/n_num*100),'\b%, ',end='')
386
+ print('done.')
387
+
388
+ #去掉严重过拟合的结果
389
+ r0=results[results['train_score'] < 1]
390
+ #去掉训练集、测试集分数不过半的模型
391
+ r0=r0[r0['train_score'] > min_score]
392
+ r0=r0[r0['test_score'] > min_score]
393
+ #去掉泛化效果差的结果
394
+ r0=r0[r0['spread'] < diff] #限定泛化差距
395
+ #优先查看泛化效果最优的结果
396
+ r1=r0.sort_values(by=['spread','test_score'],ascending=[True,False])
397
+ #优先查看测试分数最高的结果
398
+ r2=r0.sort_values(by=['test_score','spread'],ascending=[False,True])
399
+
400
+ if votes > len(r2): votes=len(r2)
401
+ r2head=r2.head(votes)
402
+
403
+ # 未完
404
+
405
+ #==============================================================================
406
+ # 训练,获得最优模型参数
407
+ #==============================================================================
408
+ if __name__=='__main__':
409
+ ndays=1
410
+ max_neighbors=10
411
+ max_p=6
412
+ cv=5
413
+ rs=0
414
+
415
+ def training_knn_clf(scaler_X,ydf,ndays=1,max_neighbors=10,max_p=6,cv=5,rs=0):
416
+ '''
417
+ 功能:对(X,y)
418
+ scaler_X: 特征矩阵
419
+ y:标签矩阵
420
+
421
+ 未用!
422
+ '''
423
+
424
+ #获得分类变量y
425
+ ydf['nextChange%']=ydf['Change%'].shift(-ndays)
426
+ ydf['nextDirection']=ydf['nextChange%'].apply(lambda x: 'Higher' if x>0 else 'Lower')
427
+ y=ydf['nextDirection']
428
+
429
+ #拆分训练集和测试集
430
+ from sklearn.model_selection import train_test_split
431
+ X_train,X_test,y_train,y_test=train_test_split(scaler_X,y,random_state=rs)
432
+
433
+ #定义网格搜索参数
434
+ param_grid = [
435
+ { # 遍历:非加权距离
436
+ 'weights': ['uniform'], # 参数取值范围
437
+ 'n_neighbors': [i for i in range(1,max_neighbors+1)] # 使用其他方式如np.arange()也可以
438
+ # 这里没有p参数
439
+ },
440
+ { # 遍历:加权距离
441
+ 'weights': ['distance'],
442
+ 'n_neighbors': [i for i in range(1,max_neighbors+1)],
443
+ 'p': [i for i in range(1,max_p)]
444
+ } ]
445
+
446
+ #训练训练集
447
+ from sklearn.neighbors import KNeighborsClassifier
448
+ knn = KNeighborsClassifier() # 默认参数,创建空分类器
449
+
450
+ from sklearn.model_selection import GridSearchCV # CV,使用交叉验证方式获得模型正确率
451
+ grid_search = GridSearchCV(knn, param_grid,scoring='accuracy',cv=cv) # 网格搜索参数
452
+
453
+ #grid_search.fit(X_train, y_train)
454
+ grid_search.fit(X,y)
455
+ best_knn=grid_search.best_estimator_
456
+ train_score=best_knn.score(X_train, y_train)
457
+ test_score=best_knn.score(X_test, y_test)
458
+
459
+ best_params=grid_search.best_params_
460
+ """
461
+ k=best_params['n_neighbors']
462
+ p=best_params['p']
463
+ w=best_params['weights']
464
+ """
465
+ return best_params,train_score,test_score
466
+
467
+
468
+ #==============================================================================
469
+ # Forecasting stock price directions by money flow in/out, using knn
470
+ #==============================================================================
471
+ if __name__=='__main__':
472
+ ticker='600519.SS'
473
+ ticker='000723.SZ'
474
+
475
+ ndays=1
476
+ min_score=0.9
477
+ diff=0.01
478
+ votes=100
479
+ max_neighbours=10
480
+ max_RS=10
481
+ printout=True
482
+
483
+ df=get_money_flowin(ticker,end='2023-6-15')
484
+ decision,known_close,known_day=price_direction_knn(ticker,df,ndays=1)
485
+
486
+ def price_direction_knn(ticker,df,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=10,printout=True):
487
+
488
+ """
489
+ 功能:基于个股资金流动预测次日股票涨跌方向,涨或跌
490
+ ticker:股票代码,无后缀
491
+ df:个股资金净流入
492
+ ndays:预测几天后的股价涨跌方向,默认1天
493
+ diff:泛化精度,越小越好
494
+ min_score:最小测试分数
495
+ votes:软表决票数
496
+ max_neighbours:最大邻居个数
497
+ max_RS:最大随机数种子
498
+ """
499
+ import pandas as pd
500
+
501
+ # 判断是否周末无交易免预测
502
+ known_day=df.tail(1)['date'].values[0]
503
+
504
+ tdate=date_adjust(known_day, adjust=ndays)
505
+ tdate_pd=pd.to_datetime(tdate)
506
+ tdate_wd=tdate_pd.weekday()
507
+ if tdate_wd in [5,6]:
508
+ print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
509
+ decision='~'
510
+ known_close=df.tail(1)['Close'].values[0]
511
+
512
+ return decision,known_close,known_day
513
+
514
+ #构造样本:标签ydf,特征矩阵X,预测X_new
515
+ X,ydf,X_new=make_sample(df,ndays=ndays)
516
+
517
+ #X_new1=X_new.head(1).values
518
+ known_day_np=X_new.index.values[0]
519
+ known_day_pd=pd.to_datetime(known_day_np)
520
+ known_day=known_day_pd.strftime("%Y-%m-%d")
521
+ known_close=X_new['Close'].values[0]
522
+
523
+ ydf['nextDirection']=ydf['Change%_next'].apply(lambda x: 1 if x>0 else -1 if x <0 else 0)
524
+
525
+ #建立样本:标签序列
526
+ y1=ydf['nextDirection'] #二分类
527
+ #y2=df2['nextChange%'] #回归
528
+ #y3=df2['nextClose'] #回归
529
+
530
+ #拆分训练集和测试集:y1
531
+ from sklearn.model_selection import train_test_split
532
+ #引入k近邻分类模型:
533
+ from sklearn.neighbors import KNeighborsClassifier
534
+
535
+ #寻找最优模型参数
536
+ nlist=list(range(1,max_neighbours+1))
537
+ n_num=len(nlist)
538
+ wlist=['uniform','distance']
539
+ mlist1=['braycurtis','canberra','correlation','dice','hamming','jaccard']
540
+ mlist2=['kulsinski','matching','rogerstanimoto','russellrao']
541
+ mlist3=['sokalmichener','sokalsneath','sqeuclidean','yule','chebyshev']
542
+ mlist4=['cityblock','euclidean','minkowski','cosine']
543
+ mlist=mlist1+mlist2+mlist3+mlist4
544
+ rslist=list(range(0,max_RS+1))
545
+
546
+ results=pd.DataFrame(columns=('spread','train_score','test_score', \
547
+ 'neighbours','weight','metric','random','pred'))
548
+ print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
549
+ print(' Progress: 0% ',end='')
550
+ for n in nlist:
551
+ for w in wlist:
552
+ for m in mlist:
553
+ for rs in rslist:
554
+ knn1=KNeighborsClassifier(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
555
+ X_train,X_test,y_train,y_test=train_test_split(X,y1,random_state=rs)
556
+ tmp=knn1.fit(X_train, y_train) #调试时去掉tmp=可跟踪模型参数
557
+ train_score=round(knn1.score(X_train, y_train),3)
558
+ test_score=round(knn1.score(X_test, y_test),3)
559
+ prediction=knn1.predict(X_new)[0]
560
+ spread=abs(round(train_score-test_score,3))
561
+
562
+ row=pd.Series({'spread':spread,'train_score':train_score, \
563
+ 'test_score':test_score,'neighbours':n, \
564
+ 'weight':w,'metric':m,'random':rs,'pred':prediction})
565
+ try:
566
+ results=results.append(row,ignore_index=True)
567
+ except:
568
+ results=results._append(row,ignore_index=True)
569
+
570
+ print(int(n/n_num*100),'\b% ',end='')
571
+ print('done.')
572
+
573
+
574
+ # 过滤训练/测试分数差的结果
575
+ r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
576
+ # 避免min_score设置得太高导致无结果,或者数量太多
577
+ r0_len=len(r0)
578
+ #if r0_len==0 or r0_len > votes:
579
+ if r0_len==0:
580
+ train_score_max=results['train_score'].max()
581
+ test_score_max=results['test_score'].max()
582
+ min_score_new=min(train_score_max,test_score_max)
583
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
584
+
585
+ if len(r0)==0:
586
+ spread_quantile=results['spread'].quantile(q=0.01)
587
+ r0_spread=results[(results['spread']<spread_quantile)]
588
+ if len(r0_spread)==0:
589
+ r0_spread=results[(results['spread']<=spread_quantile)]
590
+
591
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
592
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
593
+ if len(r0)==0:
594
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
595
+
596
+ train_score_max=r0['train_score'].max()
597
+ test_score_max=r0['test_score'].max()
598
+ min_score_new=min(train_score_max,test_score_max)
599
+
600
+ print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
601
+
602
+ #去掉泛化效果差的结果
603
+ r1=r0[r0['spread'] <= diff] #限定泛化差距
604
+ r1_len=len(r1)
605
+ #if r1_len==0 or r1_len>=votes:
606
+ if r1_len==0:
607
+ diff_new=r0['spread'].min()
608
+ r1=r0[r0['spread'] <= diff_new]
609
+ print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
610
+
611
+
612
+ """
613
+ # 首选测试效果好的
614
+ r0=results[(results['test_score']>=min_score)]
615
+ if len(r0)==0:
616
+ test_quantile=results['test_score'].quantile(q=0.99,interpolation='nearest')
617
+ r0=results[(results['test_score']>test_quantile)]
618
+ if len(r0)==0:
619
+ r0=results[(results['test_score']>=test_quantile)]
620
+
621
+ print(" #Warning: filtering parameter min_score for testing has been adjusted to",test_quantile)
622
+ """
623
+
624
+ """
625
+ # 首选训练效果好的
626
+ r0=results[(results['train_score']>=min_score) & (results['train_score']<1.0)]
627
+ if len(r0)==0:
628
+ r0_train=results[(results['train_score']>=results['test_score']) & results['train_score']<1.0]
629
+ train_quantile=r0_train['train_score'].quantile(q=0.99,interpolation='nearest')
630
+ r0=r0_train[(r0_train['train_score']>train_quantile)]
631
+ if len(r0)==0:
632
+ r0=r0_train[(r0_train['train_score']>=train_quantile)]
633
+
634
+ print(" #Warning: filtering parameter min_score for training has been adjusted to",train_quantile)
635
+
636
+ # 再选泛化效果好的
637
+ r1=r0[(r0['spread']<=diff)]
638
+ if len(r1)==0:
639
+ spread_quantile=r0['spread'].quantile(q=0.05,interpolation='nearest')
640
+ r1=r0[(r0['spread']<spread_quantile)]
641
+ if len(r1)==0:
642
+ r1=r0[(r0['spread']<=spread_quantile)]
643
+
644
+ print(" #Warning: filtering parameter diff has been adjusted to",spread_quantile)
645
+ """
646
+
647
+ #优先查看泛化效果最优的结果:spread最小
648
+ votes=min(votes,len(r1))
649
+ r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
650
+ #优先查看测试分数最高的结果:test_score最大
651
+ r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
652
+
653
+ r3head=r3
654
+
655
+ zhang=len(r3head[r3head['pred']==1])
656
+ die=len(r3head[r3head['pred']==-1])
657
+
658
+ decision='+'
659
+ decision_text='HIGHER'
660
+ if die != 0:
661
+ if zhang >= die * 2.0: decision='2+'
662
+ if zhang >= die * 3.0: decision='3+'
663
+
664
+ if die > zhang:
665
+ decision='-'
666
+ decision_text='LOWER'
667
+ if zhang != 0:
668
+ if die >= zhang * 2.0: decision='2-'
669
+ if die >= zhang * 3.0: decision='3-'
670
+
671
+ #if abs(zhang-die)/((zhang+die)/2) < 0.05: decision='?'
672
+ if zhang==die:
673
+ decision='='
674
+ decision_text='FLAT'
675
+
676
+ if not printout: return decision,today_close,stoday
677
+
678
+ print(" Model voting for stock price after "+str(ndays)+" trading days: Higher("+str(zhang)+'), Lower('+str(die)+')')
679
+ print(" "+ticker_name(ticker,'stock')+': previously closed '+str(known_close)+' @ '+str(known_day))
680
+
681
+ ndate=date_adjust(known_day,adjust=ndays)
682
+ print(" Prediction to close at",decision_text,"price trend ("+decision+') @',ndate)
683
+
684
+ return decision,known_close,known_day
685
+
686
+
687
+ #==============================================================================
688
+ if __name__=='__main__':
689
+ ticker='600519.SS'
690
+ ndays=1
691
+ diff=0.01
692
+ min_score=0.9
693
+ votes=100
694
+ max_neighbours=3
695
+ max_RS=20
696
+ preproctype='0-1'
697
+
698
+ df=forecast_direction_knn(ticker,ndays=1)
699
+ df=forecast_direction_knn(ticker,ndays=3)
700
+
701
+ def forecast_direction_knn(ticker,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=20,preproctype='0-1',end='latest'):
702
+
703
+ """
704
+ 功能:基于个股资金流动预测未来股票涨跌方向,涨或跌
705
+ ticker:股票代码,无后缀
706
+ ndays:预测几天后的股价涨跌方向,默认1天
707
+ diff:泛化精度,越小越好
708
+ min_score:最小训练/学习分数
709
+ votes:软表决票数,默认最大100
710
+ max_neighbours:最大邻居个数,默认10个
711
+ max_RS:最大随机数种子,默认最大为10
712
+ """
713
+ print("\nStart forecasting, it may take great time, please wait ...")
714
+
715
+ #抓取个股资金净流入情况df和大盘指数情况dp
716
+ df=get_money_flowin(ticker,end=end)
717
+
718
+ #预测未来股价涨跌
719
+ decisionlist=[]
720
+ for nd in list(range(1,ndays+1)):
721
+ decision,known_close,known_day=price_direction_knn(ticker,df,ndays=nd, \
722
+ diff=diff, \
723
+ min_score=min_score, \
724
+ votes=votes, \
725
+ max_neighbours=max_neighbours, \
726
+ max_RS=max_RS)
727
+ decisionlist=decisionlist+[decision]
728
+
729
+ print("\nStock information:",ticker_name(ticker,'stock'),'\b, closed',known_close,'@',known_day)
730
+ for nd in list(range(1,ndays+1)):
731
+ ndate=date_adjust(known_day, adjust=nd)
732
+ print("Forecasting stock close price trend on",ndate,end=': ')
733
+ pred=decisionlist[nd-1]
734
+ if '-' in pred:
735
+ print("LOWER,",pred)
736
+ if '+' in pred:
737
+ print("HIGHER,",pred)
738
+ if '~' in pred:
739
+ print("market closed")
740
+ if '=' in pred:
741
+ print("least different")
742
+
743
+ return decisionlist,known_close,known_day
744
+
745
+
746
+ #==============================================================================
747
+ # Forecasting stock prices by money flow in/out, using knn
748
+ #==============================================================================
749
+
750
+ if __name__=='__main__':
751
+ ticker='600519.SS'
752
+ ticker='605011.SS'
753
+
754
+ ndays=1
755
+ diff=0.01
756
+ min_score=0.8
757
+ votes=100
758
+ max_neighbours=10
759
+ max_RS=10
760
+ printout=True
761
+
762
+ df=get_money_flowin(ticker,end='2023-6-15')
763
+ pred_result=price_price_knn(ticker,df,ndays=1)
764
+ pred_result=price_price_knn(ticker,df,ndays=3)
765
+
766
+ #def price_price_knn(ticker,df,ndays=1,diff=0.01,min_score=0.8,votes=100,max_neighbours=10,max_RS=20,printout=True):
767
+ def price_price_knn(ticker,df,ndays=1,max_neighbours=10,max_RS=20,printout=True):
768
+
769
+ """
770
+ 功能:基于个股资金流动预测次日股票价格
771
+ ticker:股票代码,无后缀
772
+ df:个股资金净流入信息
773
+ ndays:预测几天后的股价涨跌方向,默认1天
774
+ diff:弃用。泛化精度,越小越好
775
+ min_score:弃用。最小训练/测试分数,越大越好
776
+ votes:弃用。软表决均值,默认100
777
+ max_neighbours:最大邻居个数
778
+ max_RS:最大随机数种子
779
+ """
780
+ import pandas as pd
781
+
782
+ # 判断是否周末无交易免预测
783
+ known_day=df.tail(1)['date'].values[0]
784
+
785
+ tdate=date_adjust(known_day, adjust=ndays)
786
+ tdate_pd=pd.to_datetime(tdate)
787
+ tdate_wd=tdate_pd.weekday()
788
+ if tdate_wd in [5,6]:
789
+ print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
790
+ decision=0
791
+ decision_score=0
792
+ known_close=df.tail(1)['Close'].values[0]
793
+
794
+ return decision,decision_score,known_close,known_day
795
+
796
+ #构造样本:标签ydf,特征矩阵X,预测X_new
797
+ X,ydf,X_new=make_sample(df,ndays=ndays)
798
+
799
+ known_day_np=X_new.index.values[0]
800
+ known_day_pd=pd.to_datetime(known_day_np)
801
+ known_day=known_day_pd.strftime("%Y-%m-%d")
802
+ known_close=X_new['Close'].values[0]
803
+
804
+ #建立样本:标签序列
805
+ #y1=df2['nextDirection'] #二分类
806
+ #y2=df2['nextChange%'] #回归
807
+ y3=ydf['Close_next'] #回归
808
+
809
+ #拆分训练集和测试集:y1
810
+ from sklearn.model_selection import train_test_split
811
+ #引入k近邻分类模型:
812
+ from sklearn.neighbors import KNeighborsRegressor
813
+
814
+ #寻找最优模型参数
815
+ nlist=list(range(1,max_neighbours+1))
816
+ n_num=len(nlist)
817
+ wlist=['uniform','distance']
818
+ mlist1=['braycurtis','canberra','correlation','dice','hamming','jaccard']
819
+ mlist2=['kulsinski','matching','rogerstanimoto','russellrao']
820
+ mlist3=['sokalmichener','sokalsneath','sqeuclidean','chebyshev']
821
+ mlist4=['cityblock','euclidean','minkowski','cosine']
822
+ mlist=mlist1+mlist2+mlist3+mlist4
823
+ rslist=list(range(0,max_RS+1))
824
+
825
+ results=pd.DataFrame(columns=('spread','train_score','test_score', \
826
+ 'neighbours','weight','metric','random','pred'))
827
+ print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
828
+ print(' Progress: 0% ',end='')
829
+ for n in nlist:
830
+ for w in wlist:
831
+ for m in mlist:
832
+ for rs in rslist:
833
+ try:
834
+ knn1=KNeighborsRegressor(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
835
+ X_train,X_test,y_train,y_test=train_test_split(X,y3,random_state=rs)
836
+ tmp=knn1.fit(X_train, y_train)
837
+ train_score=round(knn1.score(X_train, y_train),3)
838
+ test_score=round(knn1.score(X_test, y_test),3)
839
+ prediction=knn1.predict(X_new)[0]
840
+ except:
841
+ #print(" #Bug parameters: n=",n,"w=",w,"m=",m,"rs=",rs)
842
+ #break
843
+ continue
844
+ spread=abs(round(train_score-test_score,3))
845
+
846
+ row=pd.Series({'spread':spread,'train_score':train_score, \
847
+ 'test_score':test_score,'neighbours':n, \
848
+ 'weight':w,'metric':m,'random':rs,'pred':prediction})
849
+ try:
850
+ results=results.append(row,ignore_index=True)
851
+ except:
852
+ results=results._append(row,ignore_index=True)
853
+
854
+ print(int(n/n_num*100),'\b% ',end='')
855
+ print('.')
856
+
857
+ # 过滤训练/测试分数差的结果
858
+ """
859
+ r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
860
+
861
+ # 如果最低分数设置太高
862
+ r0_len=len(r0)
863
+ if r0_len==0:
864
+ train_score_max=results['train_score'].max()
865
+ test_score_max=results['test_score'].max()
866
+ min_score_new=min(train_score_max,test_score_max)
867
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
868
+
869
+ if len(r0)==0:
870
+ spread_quantile=results['spread'].quantile(q=0.01)
871
+ r0_spread=results[(results['spread']<spread_quantile)]
872
+ if len(r0_spread)==0:
873
+ r0_spread=results[(results['spread']<=spread_quantile)]
874
+
875
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
876
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
877
+ if len(r0)==0:
878
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
879
+
880
+ train_score_max=r0['train_score'].max()
881
+ test_score_max=r0['test_score'].max()
882
+ min_score_new=min(train_score_max,test_score_max)
883
+
884
+ print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
885
+ """
886
+
887
+ train_score_max=results['train_score'].max()
888
+ test_score_max=results['test_score'].max()
889
+ min_score_new=min(train_score_max,test_score_max)
890
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
891
+
892
+ if len(r0)==0:
893
+ spread_quantile=results['spread'].quantile(q=0.01)
894
+ r0_spread=results[(results['spread']<spread_quantile)]
895
+ if len(r0_spread)==0:
896
+ r0_spread=results[(results['spread']<=spread_quantile)]
897
+
898
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
899
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
900
+ if len(r0)==0:
901
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
902
+
903
+ train_score_max=r0['train_score'].max()
904
+ test_score_max=r0['test_score'].max()
905
+ min_score_new=min(train_score_max,test_score_max)
906
+
907
+ #去掉泛化效果差的结果
908
+ """
909
+ r1=r0[r0['spread'] <= diff] #限定泛化差距
910
+ r1_len=len(r1)
911
+
912
+ if r1_len==0:
913
+ diff_new=r0['spread'].min()
914
+ r1=r0[r0['spread'] <= diff_new]
915
+ print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
916
+ """
917
+
918
+ diff_new=r0['spread'].min()
919
+ r1=r0[r0['spread'] == diff_new]
920
+
921
+ #优先查看泛化效果最优的结果:spread最小
922
+ """
923
+ votes=min(votes,len(r1))
924
+ r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
925
+ #优先查看测试分数最高的结果:test_score最大
926
+ r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
927
+ """
928
+
929
+ r3head=r1
930
+ #平均股价
931
+ decision=round(r3head['pred'].mean(),2)
932
+ decision_score=round(r3head['test_score'].mean(),3)
933
+
934
+ """
935
+ r3head['w_pred']=r3head['pred'] * r3head['test_score']
936
+ w_pred_sum=r3head['w_pred'].sum()
937
+ test_score_sum=r3head['test_score'].sum()
938
+ decision=round(w_pred_sum / test_score_sum,2)
939
+ decision_score=round(r3head['test_score'].mean(),2)
940
+ """
941
+ """
942
+ #股价中位数:偶尔出现奇怪的错误,未找到原因
943
+ decision0=r3head['pred'].median()
944
+ pos=list(r3head['pred']).index(decision0)
945
+ decision_score0=list(r3head['test_score'])[pos]
946
+ decision=round(decision0,2)
947
+ decision_score=round(decision_score0,2)
948
+ """
949
+ import numpy as np
950
+ if np.isnan(decision): decision='?'
951
+
952
+ if not printout: return decision,decision_score,today_close,stoday
953
+
954
+ print(" Model poll for stock price after "+str(ndays)+" trading days:",decision)
955
+ print(" Last close price: "+ticker_name(ticker,'stock')+', closed '+str(known_close)+', '+str(known_day))
956
+
957
+ ndate=date_adjust(known_day,adjust=ndays)
958
+
959
+ print(" Prediction for stock close price on",ndate,"\b:",decision)
960
+
961
+ return decision,decision_score,known_close,known_day
962
+
963
+
964
+ #==============================================================================
965
+ if __name__=='__main__':
966
+ ticker='600519.SS'
967
+ ndays=1
968
+ max_neighbours=3
969
+ max_RS=2
970
+
971
+ forecast=forecast_price_knn(ticker,ndays=1,end='2023-6-15')
972
+ forecast=forecast_price_knn(ticker,ndays=3,end='2023-6-15')
973
+
974
+ #def forecast_price_knn(ticker,ndays=1,diff=0.03,min_score=0.7,votes=100,max_neighbours=10,max_RS=10,end='latest'):
975
+ def forecast_price_knn(ticker,ndays=1,max_neighbours=10,max_RS=20,end='latest'):
976
+ """
977
+ 功能:基于个股资金流动预测未来股票价格
978
+ ticker:股票代码,无后缀
979
+ ndays:预测几天后的股价,默认1天
980
+ diff:弃用。泛化精度,越小越好
981
+ min_score:弃用。最小训练/测试分数,越大越好
982
+ votes:弃用。软表决均值,默认最大100
983
+ max_neighbours:最大邻居个数,默认10个
984
+ max_RS:最大随机数种子,越大越好
985
+
986
+ 注意:结果经常相当不靠谱!
987
+ """
988
+ print("\nStart forecasting, it may take great time, please wait ...")
989
+
990
+ #抓取个股资金净流入情况df和大盘指数情况dp
991
+ df=get_money_flowin(ticker,end=end)
992
+
993
+ #预测未来股价涨跌
994
+ decisionlist=[]
995
+ confidencelist=[]
996
+ for nd in list(range(1,ndays+1)):
997
+ """
998
+ decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
999
+ diff=diff, \
1000
+ min_score=min_score, \
1001
+ votes=votes, \
1002
+ max_neighbours=max_neighbours, \
1003
+ max_RS=max_RS)
1004
+ """
1005
+ decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
1006
+ max_neighbours=max_neighbours, \
1007
+ max_RS=max_RS)
1008
+
1009
+ decisionlist=decisionlist+[decision]
1010
+ confidencelist=confidencelist+[confidence]
1011
+
1012
+ print("\nStock information:",ticker_name(ticker,'stock'),'\b, closed',known_close,'@',known_day)
1013
+
1014
+ for nd in list(range(1,ndays+1)):
1015
+ ndate=date_adjust(known_day, adjust=nd)
1016
+ pred=decisionlist[nd-1]
1017
+ conf=confidencelist[nd-1]
1018
+
1019
+ if (pred != 0) & (conf != 0):
1020
+ if pred != '?':
1021
+ diff=decision - known_close
1022
+ if abs(diff) < 0.05:
1023
+ comment="MINOR difference with previous one"
1024
+ elif diff > 0:
1025
+ comment="HIGHER than previous one"
1026
+ else:
1027
+ comment="LOWER than previous one"
1028
+
1029
+ print("Forecasting stock prices on",ndate,'\b:',pred,'('+str(round(conf*100,1))+'% confident)')
1030
+ print("Forecasting stock prices trend on",ndate,'\b:',comment)
1031
+ else:
1032
+ print("Forecasting stock prices on",ndate,'\b: uncertain')
1033
+ else:
1034
+ print("Forecasting stock prices on",ndate,'\b: market closed')
1035
+
1036
+ return decisionlist,confidencelist
1037
+
1038
+
1039
+ #==============================================================================
1040
+ #==============================================================================
1041
+ #==============================================================================
1042
+ if __name__=='__main__':
1043
+ mid_symbol=[';','。']
1044
+ mid_symbol=['。',';']
1045
+ longtext="姓名;年龄;职业;职称。"
1046
+ print_sentence(longtext,mid_symbol=';')
1047
+
1048
+ longtext="姓名。年龄。职业。职称。"
1049
+ print_sentence(longtext,mid_symbol='。')
1050
+
1051
+ def print_sentence(longtext,mid_symbol=[';','。']):
1052
+ """
1053
+ 功能:将长文本分句打印,间隔符号为mid_symbol
1054
+ """
1055
+ symbol=mid_symbol[0]
1056
+ try:
1057
+ sentenceList=longtext.split(symbol)
1058
+ sentenceList.remove('')
1059
+ except:
1060
+ pass
1061
+
1062
+ if len(sentenceList) == 1:
1063
+ symbol=mid_symbol[1]
1064
+ try:
1065
+ sentenceList=longtext.split(symbol)
1066
+ except:
1067
+ pass
1068
+ print(" #Error(print_sentence): middle symbol",mid_symbol,"not found in the text")
1069
+ return
1070
+
1071
+ for s in sentenceList:
1072
+
1073
+ if s == '':
1074
+ continue
1075
+
1076
+ pos=sentenceList.index(s)
1077
+
1078
+ if not (s[-1:]=='。'):
1079
+ s1=s+symbol
1080
+ else:
1081
+ s1=s
1082
+
1083
+ print(s1)
1084
+
1085
+ return
1086
+
1087
+ #==============================================================================
1088
+ if __name__ =="__main__":
1089
+ ticker='000001.SZ'
1090
+ ticker='600519.SS'
1091
+ ticker='01398.HK'
1092
+ prettytab=True
1093
+ tabborder=False
1094
+ tabborder=True
1095
+
1096
+ stock_profile_china(ticker,category='profile')
1097
+ stock_profile_china(ticker,category='profile',business_period='annual')
1098
+
1099
+ def stock_profile_china(ticker,category='profile', \
1100
+ business_period='recent', \
1101
+ financial_quarters=8, \
1102
+ start='2020-1-1', \
1103
+
1104
+ #参数prettytab和tabborder弃用,保留只为了兼容性
1105
+ prettytab=False, \
1106
+ tabborder=False, \
1107
+
1108
+ loc1='upper left',loc2='upper right', \
1109
+ facecolor='papayawhip',font_size='16px'):
1110
+ """
1111
+ 功能:介绍中国A股的主要信息,包括公司基本信息、主营信息、股东信息、财务信息、分红历史和市场估值等。
1112
+ ticker:A股股票代码
1113
+ category:信息类别,默认profile为基本信息,business为主营业务信息,shareholder为股东信息,
1114
+ financial为财务基本面,dividend为分红历史,valuation为市场估值信息。
1115
+
1116
+ business_period:配合category='business'使用,介绍主营业务使用的财报期间,
1117
+ 默认recent为最近一期(可能为季报、中报或年报),annual为使用最近的年报。
1118
+
1119
+ financial_quarters:配合category='financial'使用,介绍财务基本面使用的季度个数,最大为8.
1120
+ start:配合category='valuation'或'dividend'使用,介绍估值/分红信息的开始日期,默认为2020-1-1。
1121
+
1122
+ prettytab:输出表格样式,默认False使用markdown报表,True使用prettytable报表
1123
+ tabborder:prettytable报表时是否绘制边框,默认不绘制False,True绘制简单字符链接的边框,丑陋。
1124
+
1125
+ 返回值:无。
1126
+ 建议运行环境:Anaconda Jupyter Notebook,其他环境未测试。
1127
+ """
1128
+ DEBUG=False
1129
+
1130
+ #检查是否A股
1131
+ _,prefix,suffix=split_prefix_suffix(ticker)
1132
+ if not (suffix.upper() in ['SS','SZ','BJ']):
1133
+ print(" #Warning(stock_profile_china): not a stock in China for",ticker)
1134
+ return
1135
+
1136
+ if suffix.upper() in ['SS','SH']:
1137
+ sx='SH'
1138
+ elif suffix.upper() in ['SZ']:
1139
+ sx='SZ'
1140
+ else:
1141
+ sx='BJ'
1142
+ sxticker=sx+prefix
1143
+
1144
+ categorylist=['profile','business','shareholder','financial','dividend','valuation']
1145
+ if not (category in categorylist):
1146
+ print(" #Error(stock_detail_china): unsupported category",category)
1147
+ print(" Supported category:",categorylist)
1148
+ return
1149
+
1150
+ ticker1=ticker[:6]
1151
+ import akshare as ak
1152
+ from datetime import datetime
1153
+
1154
+ import datetime as dt
1155
+ stoday=str(dt.date.today())
1156
+
1157
+ yi=100000000.0
1158
+ yiyuan_name='(亿元)'
1159
+ yigu_name='(亿股)'
1160
+
1161
+ baiwan=1000000.0
1162
+ wan=10000.0
1163
+
1164
+ #确定表格字体大小
1165
+ titile_font_size=font_size
1166
+ heading_font_size=data_font_size=str(int(font_size.replace('px',''))-1)+'px'
1167
+ heading_font_size_small=data_font_size_small=str(int(font_size.replace('px',''))-3)+'px'
1168
+
1169
+ import akshare as ak
1170
+ # 个股基本信息======================================================================================
1171
+ if any(s in category for s in ['profile','basic']): #判断category中是否包含任意子串
1172
+
1173
+ # 个股基本信息查询1=============================================================================
1174
+ try:
1175
+ df6=ak.stock_profile_cninfo(symbol=ticker1)
1176
+ except:
1177
+ print(" #Warning(stock_profile_china): profile info not found or inaccessible for",ticker1)
1178
+ return
1179
+
1180
+ # 整理信息
1181
+ dftmp=df6.copy(deep=True)
1182
+ delColList=['入选指数','办公地址','主营业务','经营范围','机构简介']
1183
+ dftmp.drop(delColList,axis=1,inplace=True)
1184
+
1185
+ dftmp['注册资金(亿元)']=int(dftmp['注册资金'] / wan)
1186
+ newColList=['公司名称','英文名称','曾用简称','A股代码','A股简称','B股代码','B股简称','H股代码','H股简称', \
1187
+ '所属市场','所属行业','法人代表','注册资金(亿元)','成立日期','上市日期','官方网站','电子邮箱', \
1188
+ '联系电话','传真','注册地址','邮政编码']
1189
+ dftmp1=dftmp[newColList]
1190
+
1191
+ dftmp2=dftmp1.T
1192
+ dftmp2.dropna(inplace=True)
1193
+ dftmp2['项目']=dftmp2.index
1194
+ dftmp2['内容']=dftmp2[0]
1195
+ dftmp3=dftmp2[['项目','内容']]
1196
+
1197
+ dftmp3.reset_index(drop=True,inplace=True)
1198
+
1199
+ # 个股基本信息查询2=============================================================================
1200
+ try:
1201
+ df1=ak.stock_individual_info_em(symbol=ticker1)
1202
+ except:
1203
+ print(" #Warning(stock_profile_china): invalid code for",ticker)
1204
+ return
1205
+
1206
+ # 整理信息
1207
+ dftmpb=df1.copy(deep=True)
1208
+ for i in range(0, len(dftmpb)):
1209
+ item=dftmpb.iloc[i]['item'].strip()
1210
+ value=dftmpb.iloc[i]['value']
1211
+ #print(item,value)
1212
+
1213
+ if item in ["总市值","流通市值"]:
1214
+ dftmpb.iloc[i]['value']=round(value / yi,4)
1215
+ dftmpb.iloc[i]['item']=item+yiyuan_name
1216
+
1217
+ if item in ["总股本","流通股"]:
1218
+ dftmpb.iloc[i]['value']=round(value / yi,4)
1219
+ dftmpb.iloc[i]['item']=item+yigu_name
1220
+
1221
+ if item in ["上市时间"]:
1222
+ dtdate=datetime.strptime(str(value),'%Y%m%d')
1223
+ dftmpb.iloc[i]['value']=dtdate.strftime('%Y-%m-%d')
1224
+ dftmpb.rename(columns={'item':'项目','value':'内容'},inplace=True)
1225
+
1226
+ #合并
1227
+ import pandas as pd
1228
+ dftmp12=pd.concat([dftmp3,dftmpb])
1229
+ dftmp12.reset_index(drop=True,inplace=True)
1230
+ dftmp12.set_index('项目',inplace=True)
1231
+
1232
+ dftmp13=dftmp12.T
1233
+ try:
1234
+ newCols=['股票代码','股票简称','曾用简称','所属市场','所属行业', \
1235
+ '上市日期','流通股(亿股)','流通市值(亿元)','总股本(亿股)','总市值(亿元)', \
1236
+ '公司名称','英文名称','成立日期','注册资金(亿元)','法人代表', \
1237
+ '注册地址','邮政编码','联系电话','传真','官方网站','电子邮箱']
1238
+ dftmp14=dftmp13[newCols]
1239
+ except:
1240
+ newCols=['股票代码','股票简称','所属市场','所属行业', \
1241
+ '上市日期','流通股(亿股)','流通市值(亿元)','总股本(亿股)','总市值(亿元)', \
1242
+ '公司名称','英文名称','成立日期','注册资金(亿元)','法人代表', \
1243
+ '注册地址','邮政编码','联系电话','传真','官方网站','电子邮箱']
1244
+ dftmp14=dftmp13[newCols]
1245
+
1246
+ dftmp15=dftmp14.T
1247
+ dftmp15.reset_index(inplace=True)
1248
+
1249
+ titletxt=ticker_name(ticker,'stock')
1250
+ """
1251
+ if prettytab:
1252
+ pandas2prettytable(dftmp15,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='l',tabborder=tabborder)
1253
+ print(' ','数据来源:巨潮资讯,',str(today))
1254
+ else:
1255
+ print('\n*** '+titletxt+'\n')
1256
+ print(dftmp15.to_markdown(tablefmt='Simple',index=False,colalign=['left']))
1257
+ print('\n数据来源:巨潮资讯,',str(today))
1258
+ """
1259
+ titletxt1=titletxt+":基本信息"
1260
+ footnote='数据来源:巨潮资讯,'+str(stoday)
1261
+ df_display_CSS(df=dftmp15,titletxt=titletxt1,footnote=footnote, \
1262
+ facecolor=facecolor,decimals=2,last_col_align='left', \
1263
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1264
+ data_font_size=data_font_size)
1265
+
1266
+ print(' ')
1267
+ print("*****",titletxt+":业务范围")
1268
+ longtext=df6.iloc[0]["主营业务"]
1269
+ print_sentence(longtext,mid_symbol=[';','。'])
1270
+
1271
+ print("\n*****",titletxt+":经营范围")
1272
+ longtext=df6.iloc[0]["经营范围"]
1273
+ print_sentence(longtext,mid_symbol=[';','。'])
1274
+
1275
+ print("\n*****",titletxt+":机构简介")
1276
+ longtext=df6.iloc[0]["机构简介"]
1277
+ print_sentence(longtext,mid_symbol=[';','。'])
1278
+
1279
+ # 主营业务信息查询=============================================================================
1280
+ # 主营业务仅在年报/中报中公布,一三季报中无此信息
1281
+ import numpy as np
1282
+ if category == 'business':
1283
+ try:
1284
+ df2=ak.stock_zygc_em(symbol=sxticker)
1285
+ except:
1286
+ print(" #Warning(stock_profile_china): fetching business info failed for",ticker)
1287
+ return
1288
+
1289
+ df2['分类类型']=df2['分类类型'].apply(lambda x: '按业务分类' if x in [np.nan,None] else x)
1290
+ df2['报告日期']=df2['报告日期'].apply(lambda x: x.strftime("%Y-%m-%d"))
1291
+ # 整理信息
1292
+ df2['报告年度']=df2['报告日期'].apply(lambda x: x[:4])
1293
+ df2['报告月日']=df2['报告日期'].apply(lambda x: x[-5:])
1294
+ df2['报告类别']=df2['报告月日'].apply(lambda x: '年度' if x=='12-31' else '中期' if x=='06-30' else '季度')
1295
+
1296
+ if business_period in ['annual','recent']:
1297
+ if business_period == 'annual': #最近一期年报
1298
+ df2a=df2[df2['报告类别']=='年度'].copy(deep=True)
1299
+ df2a.reset_index(drop=True,inplace=True)
1300
+ if business_period == 'recent': #最近一期年报/中报
1301
+ df2a=df2.copy(deep=True)
1302
+
1303
+ period=df2a.head(1)['报告日期'][0]
1304
+ else: #具体中报或年报日期
1305
+ result,business_period1=check_date2(business_period)
1306
+ if result:
1307
+ df2a=df2[df2['报告日期']==business_period1].copy(deep=True)
1308
+ if len(df2a) > 0:
1309
+ df2a.reset_index(drop=True,inplace=True)
1310
+ period=df2a.head(1)['报告日期'][0]
1311
+ else:
1312
+ print(" #Warning(stock_profile_china): invalid business period for",business_period)
1313
+ print(" Valid business_period: annual, recent, or an valid mid-term/annual report date, eg 2022-12-31 or 2022-6-30")
1314
+ return
1315
+ else:
1316
+ print(" #Warning(stock_profile_china): invalid business period for",business_period)
1317
+ print(" Valid business_period: annual, recent, or an valid mid-term/annual report date, eg 2022-12-31 or 2022-6-30")
1318
+ return
1319
+
1320
+ dftmp=df2[df2['报告日期']==period]
1321
+ cols1=['主营构成','主营收入','收入比例','主营成本','成本比例','主营利润','利润比例','毛利率']
1322
+ #cols2=['分类方向','分类','营业收入-同比增长','营业成本-同比增长','毛利率','毛利率-同比增长']
1323
+
1324
+ for c in cols1:
1325
+ if c in ['主营收入','主营成本','主营利润']:
1326
+ dftmp[c]=dftmp[c].apply(lambda x: round(x/yi,2))
1327
+ if c in ['收入比例','成本比例','利润比例','毛利率']:
1328
+ dftmp[c]=dftmp[c].apply(lambda x: round(x*100,2))
1329
+ dftmp.rename(columns={c:c+'%'},inplace=True)
1330
+
1331
+ cols1p=['主营构成','主营收入','收入比例%','主营成本','成本比例%','主营利润','利润比例%','毛利率%']
1332
+ dftmp1a=dftmp[dftmp['分类类型']=='按业务分类'][cols1p]
1333
+ dftmp1b=dftmp[dftmp['分类类型']=='按地区分类'][cols1p]
1334
+
1335
+ titletxt1a=ticker_name(ticker,'stock')+':主营业务构成,按业务分类,单位:亿元,'+period
1336
+ titletxt1b=ticker_name(ticker,'stock')+':主营业务构成,按地区分类,单位:亿元,'+period
1337
+ """
1338
+ if prettytab:
1339
+ pandas2prettytable(dftmp1,titletxt1,firstColSpecial=True,leftColAlign='l',otherColAlign='c',tabborder=tabborder)
1340
+ print(' ','数据来源:益盟-F10,',str(today))
1341
+ else:
1342
+ print('\n*** '+titletxt1+'\n')
1343
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=['left','left','right','right','right','right','right']))
1344
+ print('\n数据来源:益盟-F10,',str(today))
1345
+ """
1346
+ footnote=''
1347
+ df_display_CSS(df=dftmp1a,titletxt=titletxt1a,footnote=footnote, \
1348
+ first_col_align='left',second_col_align='right', \
1349
+ facecolor=facecolor,decimals=2, \
1350
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1351
+ data_font_size=data_font_size)
1352
+
1353
+ print('')
1354
+ footnote='数据来源:东方财富,'+str(stoday)
1355
+ df_display_CSS(df=dftmp1b,titletxt=titletxt1b,footnote=footnote, \
1356
+ first_col_align='left',second_col_align='right', \
1357
+ facecolor=facecolor,decimals=2, \
1358
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1359
+ data_font_size=data_font_size)
1360
+
1361
+ #dftmp2=dftmp[cols2]
1362
+ #titletxt2=ticker_name(ticker,'stock')+':主营业务增长,'+period
1363
+ """
1364
+ if prettytab:
1365
+ pandas2prettytable(dftmp2,titletxt2,firstColSpecial=True,leftColAlign='l',otherColAlign='c',tabborder=tabborder)
1366
+ print(' ','数据来源:益盟-F10,',str(today))
1367
+ else:
1368
+ print('\n*** '+titletxt2+'\n')
1369
+ print(dftmp2.to_markdown(tablefmt='Simple',index=False,colalign=['left','left','right','right','right','right']))
1370
+ print('\n数据来源:益盟-F10,',str(today))
1371
+ """
1372
+ """
1373
+ df_display_CSS(df=dftmp2,titletxt=titletxt2,footnote=footnote, \
1374
+ first_col_align='left',second_col_align='left', \
1375
+ facecolor=facecolor,decimals=2, \
1376
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1377
+ data_font_size=data_font_size)
1378
+ """
1379
+ # 历史分红信息查询=============================================================================
1380
+ """
1381
+ if category == 'dividend':
1382
+ try:
1383
+ df3=ak.stock_dividents_cninfo(symbol=ticker1)
1384
+ except:
1385
+ try:
1386
+ # 测试是否akshare本身出现问题
1387
+ tmpdf3=ak.stock_dividents_cninfo(symbol='600519')
1388
+ except:
1389
+ # akshare本身出现问题
1390
+ print(" #Warning(stock_profile_china): problem incurred for akshare")
1391
+ print(" Try upgrade akshare using: pip install akshare --upgrade")
1392
+ print(" If same problem remains, try upgrade akshare again later")
1393
+ return
1394
+ print(" #Warning(stock_profile_china): dividend info not found for",ticker)
1395
+ return
1396
+
1397
+ # 整理信息
1398
+ df3.fillna('',inplace=True)
1399
+ dftmp=df3.copy(deep=True)
1400
+ dftmp.drop(['实施方案公告日期','股份到账日'],axis=1,inplace=True)
1401
+ #del dftmp['分红类型']
1402
+ #del dftmp['报告时间']
1403
+ #dftmp.drop(['送股比例','转增比例','派息比例'],axis=1,inplace=True)
1404
+
1405
+ newcols=['报告时间','送股比例','转增比例','派息比例','股权登记日','除权日','派息日','实施方案分红说明']
1406
+ dftmp1=dftmp[newcols]
1407
+
1408
+ # 替换送转派息字段中的零为空,全局替换
1409
+ dftmp2=dftmp1.replace(0,'')
1410
+ dftmp3=dftmp2.replace('','--')
1411
+
1412
+ titletxt=ticker_name(ticker)+':股利发放历史'
1413
+ if prettytab:
1414
+ pandas2prettytable(dftmp3,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='c',tabborder=tabborder)
1415
+ print(' ','数据来源:巨潮资讯,',str(today))
1416
+ else:
1417
+ print('\n*** '+titletxt+'\n')
1418
+ print(dftmp3.to_markdown(tablefmt='Simple',index=False,colalign=['left','center','center','right','center','center','center','left']))
1419
+ print('\n数据来源:巨潮资讯,',str(today))
1420
+ """
1421
+ #if category in ['dividend','split']:
1422
+ if any(s in category for s in ['dividend','split']):
1423
+ tickername=ticker_name(ticker,'stock')
1424
+ # 分红
1425
+ titletxt=tickername+':分红历史'
1426
+ try:
1427
+ #df3=ak.stock_dividents_cninfo(symbol=ticker1)
1428
+ df3=ak.stock_history_dividend_detail(symbol=ticker1, indicator="分红")
1429
+ except:
1430
+ print('')
1431
+ """
1432
+ print(titletxt)
1433
+ print(" #Warning(stock_profile_china): dividend info not found for stock",ticker)
1434
+ """
1435
+ print(f" {tickername}:no dividend info found after {start}")
1436
+ return
1437
+
1438
+ if len(df3)==0:
1439
+ """
1440
+ print('')
1441
+ print(titletxt)
1442
+ print(" No dividend record found for stock",ticker)
1443
+ """
1444
+ print(f" {tickername}:no dividend info found after {start}")
1445
+ return
1446
+
1447
+ # 整理信息
1448
+ dftmp=df3[df3['进度']=='实施']
1449
+ #dftmp.drop(['进度','红股上市日'],axis=1,inplace=True)
1450
+ dftmp.drop(['进度'],axis=1,inplace=True)
1451
+ dftmp.replace(0,'-',inplace=True)
1452
+ import numpy as np
1453
+ dftmp.replace(np.nan,'-',inplace=True)
1454
+
1455
+ newcols=['公告日期','送股','转增','派息','股权登记日','除权除息日','红股上市日']
1456
+ dftmp3=dftmp[newcols]
1457
+
1458
+ import pandas as pd
1459
+ startpd=pd.Timestamp(start)
1460
+ dftmp4=dftmp3[dftmp3['公告日期'] >= startpd.date()]
1461
+
1462
+ if len(dftmp4) == 0:
1463
+ print(f" {tickername}:no dividend info found after {start}")
1464
+ else:
1465
+ titletxt=ticker_name(ticker,'stock')+': '+text_lang('分红历史','Dividend History')
1466
+ """
1467
+ if prettytab:
1468
+ pandas2prettytable(dftmp3,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='c',tabborder=tabborder)
1469
+ print('【注】送股/转增:股数/10股,派息:元(税前)/10股,数据来源:新浪财经,',str(today))
1470
+ else:
1471
+ print('\n*** '+titletxt+'\n')
1472
+ alignlist=['center']+['right']*(len(list(dftmp3))-1)
1473
+ print(dftmp3.to_markdown(tablefmt='Simple',index=False,colalign=alignlist))
1474
+ print('【注】送股/转增:股数/10股,派息:元(税前)/10股,数据来源:新浪财经,',str(today))
1475
+ """
1476
+ footnotecn='【注】送股/转增:股数/10股,派息(元,税前)/10股,数据来源:新浪财经,'+str(stoday)
1477
+ footnoteen='[Note]Stock div/capitalization/cash div(RMB, pre-tax) per 10 shares, data source: Sina Finance, '+str(stoday)
1478
+ footnote=text_lang(footnotecn,footnoteen)
1479
+
1480
+ if check_language() == 'English':
1481
+ dftmp4.rename(columns={'公告日期':'Disclosure','送股':'Stock Div', \
1482
+ '转增':'Capitalization','派息':'Cash Div', \
1483
+ '股权登记日':'Record','除权除息日':'Ex-Dividend', \
1484
+ '红股上市日':'Stock Div Listing'},inplace=True)
1485
+
1486
+ df_display_CSS(df=dftmp4,titletxt=titletxt,footnote=footnote, \
1487
+ first_col_align='center',second_col_align='center', \
1488
+ last_col_align='center',other_col_align='center', \
1489
+
1490
+ facecolor=facecolor,decimals=2, \
1491
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1492
+ data_font_size=data_font_size)
1493
+
1494
+ # 配股
1495
+ titletxt=ticker_name(ticker,'stock')+': '+text_lang('配股历史','Rights Issue History')
1496
+ try:
1497
+ df3p=ak.stock_history_dividend_detail(symbol=ticker1, indicator="配股")
1498
+ except:
1499
+ """
1500
+ print('')
1501
+ print(titletxt)
1502
+ print(" #Warning(stock_profile_china): allotment info not found for stock",ticker)
1503
+ """
1504
+ #print(f"\n {tickername}:{start}后未找到配股信息")
1505
+ return
1506
+
1507
+ if len(df3p)==0:
1508
+ """
1509
+ print('')
1510
+ print(titletxt)
1511
+ print(" #Warning(stock_profile_china): no allotment info found for stock",ticker)
1512
+ """
1513
+ #print(f"\n {tickername}:{start}后未找到配股信息")
1514
+ return
1515
+
1516
+ # 整理信息
1517
+ dftmp=df3p[df3p['基准股本']!=0]
1518
+ dftmp.drop(['基准股本','募集资金合计'],axis=1,inplace=True)
1519
+
1520
+ newcols=['公告日期','配股方案','配股价格','股权登记日','除权日','缴款起始日','缴款终止日','配股上市日']
1521
+ dftmp3=dftmp[newcols]
1522
+
1523
+ dftmp4=dftmp3[dftmp3['公告日期'] >= startpd.date()]
1524
+ if len(dftmp4) == 0:
1525
+ #print(f"\n {tickername}:{start}后未找到配股信息")
1526
+ return
1527
+ else:
1528
+ """
1529
+ if prettytab:
1530
+ pandas2prettytable(dftmp3,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='c',tabborder=tabborder)
1531
+ print('【注】配股方案:每10股的配股数,配股价格为元。数据来源:新浪财经,',str(today))
1532
+ else:
1533
+ print('\n*** '+titletxt+'\n')
1534
+ alignlist=['center']+['right']*(len(list(dftmp3))-1)
1535
+ print(dftmp3.to_markdown(tablefmt='Simple',index=False,colalign=alignlist))
1536
+ print('【注】配股方案:每10股的配股数,配股价格为元。数据来源:新浪财经,',str(today))
1537
+ """
1538
+ footnotecn='【注】配股方案:每10股的配股数,配股价格为元。数据来源:新浪财经,'+str(stoday)
1539
+ footnoteen='Rights issue ratio: per 10 shares, subscription price in RMB, data source: Sina Finance, '+str(stoday)
1540
+ footnote=text_lang(footnotecn,footnoteen)
1541
+
1542
+ if check_language() == 'English':
1543
+ dftmp4.rename(columns={'公告日期':'Disclosure','配股方案':'Rights Issue Ratio', \
1544
+ '配股价格':'Subscription Price','股权登记日':'Record', \
1545
+ '除权日':'Ex-Rights','缴款起始日':'Subscription Start', \
1546
+ '缴款终止日':'Subscription End','配股上市日':'Rights Listing'},inplace=True)
1547
+
1548
+ df_display_CSS(df=dftmp4,titletxt=titletxt,footnote=footnote, \
1549
+ first_col_align='center',second_col_align='center', \
1550
+ last_col_align='center',other_col_align='center', \
1551
+
1552
+ facecolor=facecolor,decimals=2, \
1553
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1554
+ data_font_size=data_font_size)
1555
+
1556
+
1557
+ # 主要股东信息查询=============================================================================
1558
+ #if category in ['shareholder','investor']:
1559
+ if any(s in category for s in ['shareholder','investor']):
1560
+ try:
1561
+ df4=ak.stock_main_stock_holder(stock=ticker1)
1562
+ except:
1563
+ print(" #Warning(stock_profile_china): shareholder info not found for",ticker)
1564
+ return
1565
+
1566
+ # 整理信息
1567
+ #df4.fillna('',inplace=True)
1568
+ df4.fillna(0,inplace=True)
1569
+
1570
+ #df4['报告年度']=df4['截至日期'].apply(lambda x: x.year)
1571
+ df4['报告年度']=df4['截至日期'].apply(lambda x: x.strftime("%Y"))
1572
+ df4['报告月日']=df4['截至日期'].apply(lambda x: x.strftime("%m-%d"))
1573
+ df4['报告类别']=df4['报告月日'].apply(lambda x: '年度' if x=='12-31' else '中期' if x=='06-30' else '季度')
1574
+
1575
+ if business_period in ['annual','recent']:
1576
+ if business_period == 'annual': #最近一期年报
1577
+ df4a=df4[df4['报告类别']=='年度'].copy(deep=True)
1578
+ df4a.reset_index(drop=True,inplace=True)
1579
+ if business_period == 'recent': #最近一期年报/中报
1580
+ df4a=df4.copy(deep=True)
1581
+
1582
+ period=df4a.head(1)['截至日期'][0]
1583
+ else: #具体财报日期
1584
+ result,business_period1=check_date2(business_period)
1585
+ if result:
1586
+ # 转换为字符串类型,否则比较失败
1587
+ df4['截至日期1']=df4['截至日期'].apply(lambda x:str(x))
1588
+ df4a=df4[df4['截至日期1']==business_period1].copy(deep=True)
1589
+ if len(df4a) > 0:
1590
+ df4a.reset_index(drop=True,inplace=True)
1591
+ period=df4a.head(1)['截至日期'][0]
1592
+ else:
1593
+ print(" #Warning(stock_profile_china): invalid business period for",business_period)
1594
+ print(" Valid business_period: annual, recent, or an valid mid-term/annual report date, eg 2022-12-31")
1595
+ return
1596
+ else:
1597
+ print(" #Warning(stock_profile_china): invalid business period for",business_period)
1598
+ print(" Valid business_period: annual, recent, or an valid mid-term/annual report date, eg 2022-6-30")
1599
+ return
1600
+
1601
+ #df4b=df4a[df4a['持股比例'] != 0]
1602
+ #df4a['持股比例'].replace(0,'unknown',inplace=True)
1603
+ #df4a['持股比例'].replace(0,'?',inplace=True)
1604
+ df4a['持股比例'].replace(0,'--',inplace=True)
1605
+ dftmp=df4a.head(10).copy(deep=True)
1606
+
1607
+ enddate=str(dftmp.head(1)['截至日期'][0])
1608
+ shareholder_num=dftmp.head(1)['股东总数'][0]
1609
+ avg_shares=dftmp.head(1)['平均持股数'][0]
1610
+ titletxt=ticker_name(ticker,'stock')+':十大股东(截至'+str(enddate)+',股东总数'+str(int(shareholder_num))+',平均持股数'+str(int(avg_shares))+')'
1611
+
1612
+ dftmp.drop(['截至日期','公告日期','股东说明','股东总数','平均持股数'],axis=1,inplace=True)
1613
+
1614
+ #dftmp['持股数量(股)']=dftmp['持股数量(股)'].apply(lambda x: mstring2number(x))
1615
+ #dftmp['持股数量']=dftmp['持股数量'].apply(lambda x: mstring2number(x))
1616
+ dftmp['持股数量']=dftmp['持股数量'].apply(lambda x: float(x))
1617
+ #dftmp['持股数量(百万股)']=dftmp['持股数量(股)'].apply(lambda x: round(x / baiwan,2))
1618
+ dftmp['持股数量(百万股)']=dftmp['持股数量'].apply(lambda x: round(x / baiwan,2))
1619
+
1620
+ #dftmp['持股比例(%)']=dftmp['持股比例'].apply(lambda x: mstring2number(x,'float'))
1621
+ dftmp['持股比例(%)']=dftmp['持股比例']
1622
+
1623
+ # 检查持股比例是否异常
1624
+ check_holding=dftmp.head(1)['持股比例'][0]
1625
+ if check_holding ==0.0:
1626
+ print(" #Warning(stock_profile_china): shareholder holding info seems weired")
1627
+ dftmp=dftmp.replace(0,'---')
1628
+
1629
+ #newcols=['编号','股东名称','股本性质','持股比例(%)','持股数量(百万股)']
1630
+ newcols=['编号','股东名称','股本性质','持股比例(%)']
1631
+ dftmp1=dftmp[newcols]
1632
+ """
1633
+ if prettytab:
1634
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='c',otherColAlign='c',tabborder=tabborder)
1635
+ print(' ','数据来源:新浪财经,',str(today))
1636
+ else:
1637
+ print('\n*** '+titletxt+'\n')
1638
+ #print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=['center','left','left','right','right']))
1639
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=['center','left','left','right']))
1640
+ print('\n数据来源:新浪财经,',str(today))
1641
+ """
1642
+ footnote='数据来源:新浪财经,'+str(stoday)
1643
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
1644
+ first_col_align='center',second_col_align='left', \
1645
+ facecolor=facecolor,decimals=2, \
1646
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size, \
1647
+ data_font_size=data_font_size)
1648
+
1649
+
1650
+ # 主要市场指标查询=============================================================================
1651
+ #if category == 'valuation':
1652
+ if any(s in category for s in ['valuation','market']):
1653
+ try:
1654
+ #df5=ak.stock_a_lg_indicator(symbol=ticker1)
1655
+ df5=ak.stock_a_indicator_lg(symbol=ticker1)
1656
+ except:
1657
+ print(" #Warning(stock_profile_china): valuation spyder failed or info not found for",ticker)
1658
+ return
1659
+
1660
+ # 整理信息
1661
+ import pandas as pd
1662
+ #startpd=pd.to_datetime(start)
1663
+ startpd=pd.Timestamp(start)
1664
+
1665
+ dftmp=df5.copy(deep=True)
1666
+ dftmp1=dftmp.set_index('trade_date')
1667
+ dftmp2=dftmp1[dftmp1.index >= startpd.date()]
1668
+
1669
+ # 总市值转换为亿元
1670
+ dftmp2['total_mv(yi)']=dftmp2['total_mv'] / wan #原单位为万元
1671
+
1672
+ # 计算总市值的均值,中位数、最大最小值
1673
+ mv_mean=round(dftmp2['total_mv(yi)'].mean(),1)
1674
+ mv_median=round(dftmp2['total_mv(yi)'].median(),1)
1675
+ mv_max=round(dftmp2['total_mv(yi)'].max(),1)
1676
+ mv_min=round(dftmp2['total_mv(yi)'].min(),1)
1677
+ mv_txt="总市值(亿元):"+str(mv_min)+'-'+str(mv_max)+",均值"+str(mv_mean)+",中位数"+str(mv_median)
1678
+
1679
+ titletxt=ticker_name(ticker,'stock')+':估值与市值'
1680
+ import datetime as dt
1681
+ today=dt.date.today()
1682
+ footnote3="数据来源:乐咕乐股,"+str(stoday)
1683
+
1684
+ # 计算市盈率的均值,中位数、最大最小值
1685
+ #va='pe'; va_name="市盈率"
1686
+ va='pe_ttm'; va_name="市盈率TTM"
1687
+ va_mean=round(dftmp2[va].mean(),1)
1688
+ va_median=round(dftmp2[va].median(),1)
1689
+ va_max=round(dftmp2[va].max(),1)
1690
+ va_min=round(dftmp2[va].min(),1)
1691
+ va_txt=va_name+":"+str(va_min)+'-'+str(va_max)+",均值"+str(va_mean)+",中位数"+str(va_median)
1692
+
1693
+ footnote=va_txt+";"+mv_txt+"\n"+footnote3
1694
+
1695
+ # 市盈率与总市值
1696
+ if DEBUG:
1697
+ display(dftmp2)
1698
+ """
1699
+ plot2_line2(df1=dftmp2,ticker1='',colname1=va,label1=va_name, \
1700
+ df2=dftmp2,ticker2='',colname2='total_mv(yi)',label2='总市值(亿元)', \
1701
+ ylabeltxt='',titletxt=titletxt,footnote=footnote, \
1702
+ date_range=True,date_freq='Q',date_fmt='%Y-%m',twinx=True, \
1703
+ resample_freq='D', \
1704
+ loc1=loc1,loc2=loc2, \
1705
+ color1='red',color2='blue',facecolor=facecolor)
1706
+ """
1707
+ plot_line2(df1=dftmp2,ticker1='',colname1=va,label1=va_name, \
1708
+ df2=dftmp2,ticker2='',colname2='total_mv(yi)',label2='总市值(亿元)', \
1709
+ ylabeltxt='',titletxt=titletxt,footnote=footnote, \
1710
+ twinx=True, \
1711
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1712
+ color1='red',color2='blue',facecolor='whitesmoke')
1713
+
1714
+ # 计算市净率的均值,中位数、最大最小值
1715
+ va='pb'; va_name="市净率"
1716
+ va_mean=round(dftmp2[va].mean(),1)
1717
+ va_median=round(dftmp2[va].median(),1)
1718
+ va_max=round(dftmp2[va].max(),1)
1719
+ va_min=round(dftmp2[va].min(),1)
1720
+ va_txt=va_name+":"+str(va_min)+'-'+str(va_max)+",均值"+str(va_mean)+",中位数"+str(va_median)
1721
+
1722
+ footnote=va_txt+";"+mv_txt+"\n"+footnote3
1723
+
1724
+ # 市净率与总市值
1725
+ """
1726
+ plot2_line2(dftmp2,'',va,va_name, \
1727
+ dftmp2,'','total_mv(yi)','总市值(亿元)', \
1728
+ '',titletxt,footnote, \
1729
+ date_range=True,date_freq='Q',date_fmt='%Y-%m',twinx=True, \
1730
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1731
+ color1='red',color2='blue',facecolor=facecolor)
1732
+ """
1733
+ plot_line2(df1=dftmp2,ticker1='',colname1=va,label1=va_name, \
1734
+ df2=dftmp2,ticker2='',colname2='total_mv(yi)',label2='总市值(亿元)', \
1735
+ ylabeltxt='',titletxt=titletxt,footnote=footnote, \
1736
+ twinx=True, \
1737
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1738
+ color1='red',color2='blue',facecolor='whitesmoke')
1739
+
1740
+ # 计算市销率的均值,中位数、最大最小值
1741
+ va='ps_ttm'; va_name="市销率TTM"
1742
+ va_mean=round(dftmp2[va].mean(),1)
1743
+ va_median=round(dftmp2[va].median(),1)
1744
+ va_max=round(dftmp2[va].max(),1)
1745
+ va_min=round(dftmp2[va].min(),1)
1746
+ va_txt=va_name+":"+str(va_min)+'-'+str(va_max)+",均值"+str(va_mean)+",中位数"+str(va_median)
1747
+
1748
+ footnote=va_txt+";"+mv_txt+"\n"+footnote3
1749
+
1750
+ # 市销率与总市值
1751
+ """
1752
+ plot2_line2(dftmp2,'',va,va_name, \
1753
+ dftmp2,'','total_mv(yi)','总市值(亿元)', \
1754
+ '',titletxt,footnote, \
1755
+ date_range=True,date_freq='Q',date_fmt='%Y-%m',twinx=True, \
1756
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1757
+ color1='red',color2='blue',facecolor=facecolor)
1758
+ """
1759
+ plot_line2(df1=dftmp2,ticker1='',colname1=va,label1=va_name, \
1760
+ df2=dftmp2,ticker2='',colname2='total_mv(yi)',label2='总市值(亿元)', \
1761
+ ylabeltxt='',titletxt=titletxt,footnote=footnote, \
1762
+ twinx=True, \
1763
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1764
+ color1='red',color2='blue',facecolor='whitesmoke')
1765
+
1766
+ # 计算股息率的均值,中位数、最大最小值
1767
+ #va='dv_ratio'; va_name="股息率"
1768
+ va='dv_ttm'; va_name="股息率TTM"
1769
+ va_mean=round(dftmp2[va].mean(),1)
1770
+ va_median=round(dftmp2[va].median(),1)
1771
+ va_max=round(dftmp2[va].max(),1)
1772
+ va_min=round(dftmp2[va].min(),1)
1773
+ va_txt=va_name+"%:"+str(va_min)+'-'+str(va_max)+",均值"+str(va_mean)+",中位数"+str(va_median)
1774
+
1775
+ footnote=va_txt+";"+mv_txt+"\n"+footnote3
1776
+
1777
+ # 股息率与总市值
1778
+ """
1779
+ plot2_line2(dftmp2,'',va,va_name+'%', \
1780
+ dftmp2,'','total_mv(yi)','总市值(亿元)', \
1781
+ '',titletxt,footnote, \
1782
+ date_range=True,date_freq='Q',date_fmt='%Y-%m',twinx=True, \
1783
+ loc1=loc1,loc2=loc2, \
1784
+ color1='red',color2='blue',facecolor=facecolor)
1785
+ """
1786
+ plot_line2(df1=dftmp2,ticker1='',colname1=va,label1=va_name+'%', \
1787
+ df2=dftmp2,ticker2='',colname2='total_mv(yi)',label2='总市值(亿元)', \
1788
+ ylabeltxt='',titletxt=titletxt,footnote=footnote, \
1789
+ twinx=True, \
1790
+ resample_freq='D',loc1=loc1,loc2=loc2, \
1791
+ color1='red',color2='blue',facecolor='whitesmoke')
1792
+
1793
+
1794
+ # 财务基本面指标查询=============================================================================
1795
+ #if category == 'financial':
1796
+ if any(s in category for s in ['financial','healthy']):
1797
+
1798
+ try:
1799
+ df7=ak.stock_financial_analysis_indicator(symbol=ticker1)
1800
+ #其中的日期为datetime.date object类型
1801
+ df7['日期']=df7['日期'].apply(lambda x: x.strftime("%Y-%m-%d"))
1802
+ #降序排列
1803
+ df7.sort_values(by=['日期'],ascending=False,inplace=True)
1804
+ except:
1805
+ print(" #Warning(stock_detail_china):financial info not found for",ticker)
1806
+ return
1807
+
1808
+ df7['财报类别']=df7['日期'].apply(lambda x: x[5:])
1809
+ # 注意:lambda中若使用if就同时要规定else
1810
+ df7['财报类别']=df7['财报类别'].apply(lambda x: '三季度报' if x=='09-30' else x)
1811
+ df7['财报类别']=df7['财报类别'].apply(lambda x: '中报' if x=='06-30' else x)
1812
+ df7['财报类别']=df7['财报类别'].apply(lambda x: '一季度报' if x=='03-31' else x)
1813
+ df7['财报类别']=df7['财报类别'].apply(lambda x: '年报' if x=='12-31' else x)
1814
+
1815
+ # 整理信息:近两年,最多8个季度,再多会产生格式错位
1816
+ numOfQ=financial_quarters
1817
+ if numOfQ > 8:
1818
+ numOfQ=8
1819
+
1820
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,每股指标(元)"
1821
+ df7['加权每股收益']=df7['加权每股收益(元)']
1822
+ df7['每股收益_调整后']=df7['每股收益_调整后(元)']
1823
+ df7['扣非后每股收益']=df7['扣除非经常性损益后的每股收益(元)']
1824
+ df7['每股净资产_调整后']=df7['每股净资产_调整后(元)']
1825
+ df7['每股经营性现金流']=df7['每股经营性现金流(元)']
1826
+ df7['每股资本公积金']=df7['每股资本公积金(元)']
1827
+ df7['每股未分配利润']=df7['每股未分配利润(元)']
1828
+
1829
+ colList=['日期','财报类别','加权每股收益','每股收益_调整后','扣非后每股收益', \
1830
+ '每股净资产_调整后','每股经营性现金流','每股未分配利润']
1831
+ dftmp=df7[colList].head(numOfQ)
1832
+
1833
+ # 为应对更多的字段,转置矩阵打印
1834
+ dftmp.set_index('日期',inplace=True)
1835
+ dftmp1=dftmp.T
1836
+ dftmp1.reset_index(inplace=True)
1837
+ dftmp1.rename(columns={'index':'项目'},inplace=True)
1838
+ """
1839
+ if prettytab:
1840
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
1841
+ print(' ','数据来源:新浪财经,',str(today))
1842
+ else:
1843
+ print('\n*** '+titletxt+'\n')
1844
+ colalignList=['left','right','right','right','right','right','right','right','right']
1845
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
1846
+ print('\n数据来源:新浪财经,',str(today))
1847
+ """
1848
+ footnote='数据来源:新浪财经,'+str(stoday)
1849
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
1850
+ facecolor=facecolor,decimals=2, \
1851
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
1852
+ data_font_size=data_font_size_small)
1853
+
1854
+ """
1855
+ 加权平均每股收益:
1856
+ 指计算时股份数用按月对总股数加权计算的数据,理由是由于公司投入的资本和资产不同,收益产生的基础也不同。
1857
+
1858
+ 摊薄每股收益:
1859
+ 指按年末的普通股总数计算出来的每股收益,它等于净利润除以年末总股本。
1860
+
1861
+ 调整后的每股收益:又称稀释每股收益
1862
+ 有的公司发行了可转债、认购权证、股票期权等,那么在计算每股收益的时候,就有调整前后了。
1863
+ 调整前的,不考虑这些可能导致的股份增加;调整后,要考虑导致股份增加后的情况。
1864
+
1865
+ 稀释每股收益是以基本每股收益为基础,假设企业所有发行在外的稀释性潜在普通股均已转换为普通股,
1866
+ 从而分别调整归属于普通股股东的当期净利润以及发行在外普通股的加权平均数计算而得的每股收益。
1867
+ 比如某个公司有权证、可转债、即将执行的股权激励,就意味着股份有潜在的增加可能,
1868
+ 为了准确评估每股收益,就必须用稀释每股收益。
1869
+ """
1870
+
1871
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,利润与成本"
1872
+ df7['扣非后净利润(元)']=df7['扣除非经常性损益后的净利润(元)']
1873
+ colList=['日期','财报类别','总资产利润率(%)','主营业务利润率(%)','总资产净利润率(%)','成本费用利润率(%)', \
1874
+ '营业利润率(%)','主营业务成本率(%)','销售净利率(%)', '销售毛利率(%)','三项费用比重','非主营比重', \
1875
+ '主营利润比重', '主营业务利润(元)', '扣非后净利润(元)']
1876
+ dftmp=df7[colList].head(numOfQ)
1877
+
1878
+ # 去掉全列为空的字段
1879
+ dftmpCols=list(dftmp)
1880
+ for f in dftmpCols:
1881
+ fnum=len(set(dftmp[f].tolist()))
1882
+ if fnum == 1:
1883
+ del dftmp[f]
1884
+
1885
+ dftmp['主营业务利润(百万元)']=dftmp['主营业务利润(元)'].apply(lambda x: round(float(x)/baiwan,4))
1886
+ dftmp['扣非后净利润(百万元)']=dftmp['扣非后净利润(元)'].apply(lambda x: round(float(x)/baiwan,4))
1887
+ dftmp.drop(['总资产利润率(%)','总资产净利润率(%)','主营业务利润(元)','扣非后净利润(元)'],axis=1,inplace=True)
1888
+
1889
+ # 为应对更多的字段,转置矩阵打印
1890
+ dftmp.set_index('日期',inplace=True)
1891
+ dftmp1=dftmp.T
1892
+ dftmp1.reset_index(inplace=True)
1893
+ dftmp1.rename(columns={'index':'项目'},inplace=True)
1894
+ """
1895
+ if prettytab:
1896
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
1897
+ print(' ','数据来源:新浪财经,',str(today))
1898
+ else:
1899
+ print('\n*** '+titletxt+'\n')
1900
+ colalignList=['left','right','right','right','right','right','right','right','right']
1901
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
1902
+ print('\n数据来源:新浪财经,',str(today))
1903
+ """
1904
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
1905
+ facecolor=facecolor,decimals=2, \
1906
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
1907
+ data_font_size=data_font_size_small)
1908
+
1909
+
1910
+ """
1911
+ 总资产利润率=利润总额/平均总资产
1912
+ 总资产净利润率=净利润/平均总资产
1913
+ 净利润 = 利润总额 - 所得税费用
1914
+
1915
+ 通常,总资产利润率 > 总资产净利润率(不包括主营业务亏损时)
1916
+
1917
+ 成本费用利润率=利润总额/成本费用总额(即成本总额+费用总额)
1918
+ """
1919
+
1920
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,报酬与收益"
1921
+ colList=['日期','财报类别', '股本报酬率(%)','净资产报酬率(%)','资产报酬率(%)', '股息发放率(%)','投资收益率(%)', \
1922
+ '净资产收益率(%)','加权净资产收益率(%)']
1923
+ dftmp=df7[colList].head(numOfQ)
1924
+
1925
+ # 去掉全列为空的字段
1926
+ dftmpCols=list(dftmp)
1927
+ for f in dftmpCols:
1928
+ fnum=len(set(dftmp[f].tolist()))
1929
+ if fnum == 1:
1930
+ del dftmp[f]
1931
+
1932
+ finalCols=['日期','财报类别','资产报酬率(%)','净资产报酬率(%)']
1933
+ dftmp1=dftmp[finalCols]
1934
+
1935
+ # 为应对更多的字段,转置矩阵打印
1936
+ dftmp1.set_index('日期',inplace=True)
1937
+ dftmp2=dftmp1.T
1938
+ dftmp2.reset_index(inplace=True)
1939
+ dftmp2.rename(columns={'index':'项目'},inplace=True)
1940
+ """
1941
+ if prettytab:
1942
+ pandas2prettytable(dftmp2,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
1943
+ print(' ','数据来源:新浪财经,',str(today))
1944
+ else:
1945
+ print('\n*** '+titletxt+'\n')
1946
+ colalignList=['left','right','right','right','right','right','right','right','right']
1947
+ print(dftmp2.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
1948
+ print('\n数据来源:新浪财经,',str(today))
1949
+ """
1950
+ df_display_CSS(df=dftmp2,titletxt=titletxt,footnote=footnote, \
1951
+ facecolor=facecolor,decimals=2, \
1952
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
1953
+ data_font_size=data_font_size_small)
1954
+
1955
+
1956
+ """
1957
+ 净资产收益率=净利润/净资产。净资产=所有者权益+少数股东权益
1958
+
1959
+ 净资产报酬率=净利润/平均净资产总额。平均净资产总额=期初期末净资产总额平均值
1960
+
1961
+ 股本报酬率/回报率=净利润/期初期末总股本的均值。股本是实收资本,而股权是股东权益。
1962
+ """
1963
+
1964
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,增长率"
1965
+ colList=['日期','财报类别','主营业务收入增长率(%)','净利润增长率(%)','总资产增长率(%)','净资产增长率(%)']
1966
+ dftmp=df7[colList].head(numOfQ)
1967
+
1968
+ # 去掉全列为空的字段
1969
+ dftmpCols=list(dftmp)
1970
+ for f in dftmpCols:
1971
+ fnum=len(set(dftmp[f].tolist()))
1972
+ if fnum == 1:
1973
+ del dftmp[f]
1974
+
1975
+ # 为应对更多的字段,转置矩阵打印
1976
+ dftmp.set_index('日期',inplace=True)
1977
+ dftmp1=dftmp.T
1978
+ dftmp1.reset_index(inplace=True)
1979
+ dftmp1.rename(columns={'index':'项目'},inplace=True)
1980
+ """
1981
+ if prettytab:
1982
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
1983
+ print(' ','数据来源:新浪财经,',str(today))
1984
+ else:
1985
+ print('\n*** '+titletxt+'\n')
1986
+ colalignList=['left','right','right','right','right','right','right','right','right']
1987
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
1988
+ print('\n数据来源:新浪财经,',str(today))
1989
+
1990
+ """
1991
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
1992
+ facecolor=facecolor,decimals=2, \
1993
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
1994
+ data_font_size=data_font_size_small)
1995
+
1996
+
1997
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,资产负债分析"
1998
+ colList=['日期','财报类别','流动比率','速动比率','现金比率(%)','利息支付倍数','长期债务与营运资金比率(%)', \
1999
+ '股东权益比率(%)','长期负债比率(%)','股东权益与固定资产比率(%)','负债与所有者权益比率(%)', \
2000
+ '长期资产与长期资金比率(%)','资本化比率(%)','固定资产净值率(%)','资本固定化比率(%)', \
2001
+ '产权比率(%)','清算价值比率(%)','固定资产比重(%)','资产负债率(%)','总资产(元)']
2002
+ dftmp=df7[colList].head(numOfQ)
2003
+ dftmp.rename(columns={'长期债务与营运资金比率(%)':'长期债务/营运资金(%)', \
2004
+ '股东权益与固定资产比率(%)':'股东权益/固定资产(%)', \
2005
+ '负债与所有者权益比率(%)':'负债/所有者权益(%)', \
2006
+ '长期资产与长期资金比率(%)':'长期资产/长期资金(%)', \
2007
+ '股东权益与固定资产比率(%)':'股东权益/固定资产(%)'},inplace=True)
2008
+
2009
+ # 去掉全列为空的字段
2010
+ dftmpCols=list(dftmp)
2011
+ for f in dftmpCols:
2012
+ fnum=len(set(dftmp[f].tolist()))
2013
+ if fnum == 1:
2014
+ del dftmp[f]
2015
+
2016
+ dftmp['总资产(亿元)']=dftmp['总资产(元)'].apply(lambda x: round(float(x)/yi,4))
2017
+ dftmp.drop(['总资产(元)'],axis=1,inplace=True)
2018
+
2019
+ # 为应对更多的字段,转置矩阵打印
2020
+ dftmp.set_index('日期',inplace=True)
2021
+ dftmp1=dftmp.T
2022
+ dftmp1.reset_index(inplace=True)
2023
+ dftmp1.rename(columns={'index':'项目'},inplace=True)
2024
+ """
2025
+ if prettytab:
2026
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
2027
+ print(' ','数据来源:新浪财经,',str(today))
2028
+ else:
2029
+ print('\n*** '+titletxt+'\n')
2030
+ colalignList=['left','right','right','right','right','right','right','right','right']
2031
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
2032
+ print('\n数据来源:新浪财经,',str(today))
2033
+ """
2034
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
2035
+ facecolor=facecolor,decimals=2, \
2036
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
2037
+ data_font_size=data_font_size_small)
2038
+
2039
+
2040
+ """
2041
+ 股东权益比率(又称自有资本比率或净资产比率)是股东权益与资产总额的比率。
2042
+ 固定资产净值率是指固定资产原价扣除其累计磨损额后的余额即固定资产折余价值对固定资产原价的比率。
2043
+ 资本固定化比率=(资产总计-流动资产合计)/所有者权益平均余额
2044
+ 固定资产比率是指固定资产与资产总额之比。
2045
+
2046
+ """
2047
+
2048
+ titletxt=ticker_name(ticker,'stock')+":主要财务信息,现金流量指标(均为%)"
2049
+ colList=['日期','财报类别','经营现金净流量对销售收入比率(%)','资产的经营现金流量回报率(%)','经营现金净流量与净利润的比率(%)', \
2050
+ '经营现金净流量对负债比率(%)','现金流量比率(%)']
2051
+ dftmp=df7[colList].head(numOfQ)
2052
+ dftmp.rename(columns={'经营现金净流量对销售收入比率(%)':'经营现金净流量/销售收入', \
2053
+ '资产的经营现金流量回报率(%)':'资产的经营现金流量回报率', \
2054
+ '经营现金净流量与净利润的比率(%)':'经营现金净流量/净利润', \
2055
+ '经营现金净流量对负债比率(%)':'经营现金净流量/负债', \
2056
+ '现金流量比率(%)':'现金流量比率'},inplace=True)
2057
+
2058
+ # 去掉全列为空的字段
2059
+ dftmpCols=list(dftmp)
2060
+ for f in dftmpCols:
2061
+ fnum=len(set(dftmp[f].tolist()))
2062
+ if fnum == 1:
2063
+ del dftmp[f]
2064
+
2065
+ # 为应对更多的字段,转置矩阵打印
2066
+ dftmp.set_index('日期',inplace=True)
2067
+ dftmp1=dftmp.T
2068
+ dftmp1.reset_index(inplace=True)
2069
+ dftmp1.rename(columns={'index':'项目'},inplace=True)
2070
+
2071
+ """
2072
+ if prettytab:
2073
+ pandas2prettytable(dftmp1,titletxt,firstColSpecial=False,leftColAlign='l',otherColAlign='r',tabborder=tabborder)
2074
+ print(' ','数据来源:新浪财经,',str(today))
2075
+ else:
2076
+ print('\n*** '+titletxt+'\n')
2077
+ colalignList=['left','right','right','right','right','right','right','right','right']
2078
+ print(dftmp1.to_markdown(tablefmt='Simple',index=False,colalign=colalignList))
2079
+ print('\n数据来源:新浪财经,',str(today))
2080
+ """
2081
+ df_display_CSS(df=dftmp1,titletxt=titletxt,footnote=footnote, \
2082
+ facecolor=facecolor,decimals=2, \
2083
+ titile_font_size=titile_font_size,heading_font_size=heading_font_size_small, \
2084
+ data_font_size=data_font_size_small)
2085
+
2086
+ """
2087
+ 资产的经营现金流量回报率是经营活动产生的现金流量净额/总资产,是体现企业收现能力的指标之一。
2088
+
2089
+ """
2090
+
2091
+ return
2092
+
2093
+
2094
+ #==============================================================================
2095
+ #==============================================================================