siat 3.10.132__py3-none-any.whl → 3.10.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. siat/__init__.py +0 -0
  2. siat/allin.py +0 -0
  3. siat/assets_liquidity.py +0 -0
  4. siat/beta_adjustment.py +0 -0
  5. siat/beta_adjustment_china.py +0 -0
  6. siat/blockchain.py +0 -0
  7. siat/bond.py +0 -0
  8. siat/bond_base.py +0 -0
  9. siat/bond_china.py +0 -0
  10. siat/bond_zh_sina.py +0 -0
  11. siat/capm_beta.py +0 -0
  12. siat/capm_beta2.py +0 -0
  13. siat/compare_cross.py +0 -0
  14. siat/copyrights.py +0 -0
  15. siat/cryptocurrency.py +0 -0
  16. siat/economy.py +0 -0
  17. siat/economy2.py +0 -0
  18. siat/esg.py +0 -0
  19. siat/event_study.py +0 -0
  20. siat/exchange_bond_china.pickle +0 -0
  21. siat/fama_french.py +0 -0
  22. siat/fin_stmt2_yahoo.py +0 -0
  23. siat/financial_base.py +0 -0
  24. siat/financial_statements.py +0 -0
  25. siat/financials.py +0 -0
  26. siat/financials2.py +0 -0
  27. siat/financials_china.py +0 -0
  28. siat/financials_china2.py +0 -0
  29. siat/fund.py +0 -0
  30. siat/fund_china.pickle +0 -0
  31. siat/fund_china.py +0 -0
  32. siat/future_china.py +0 -0
  33. siat/google_authenticator.py +0 -0
  34. siat/grafix.py +0 -0
  35. siat/holding_risk.py +0 -0
  36. siat/luchy_draw.py +0 -0
  37. siat/market_china.py +0 -0
  38. siat/markowitz.py +0 -0
  39. siat/markowitz2.py +0 -0
  40. siat/markowitz2_20250704.py +0 -0
  41. siat/markowitz2_20250705.py +0 -0
  42. siat/markowitz_simple.py +0 -0
  43. siat/ml_cases.py +0 -0
  44. siat/ml_cases_example.py +0 -0
  45. siat/option_china.py +0 -0
  46. siat/option_pricing.py +0 -0
  47. siat/other_indexes.py +0 -0
  48. siat/risk_adjusted_return.py +0 -0
  49. siat/risk_adjusted_return2.py +0 -0
  50. siat/risk_evaluation.py +0 -0
  51. siat/risk_free_rate.py +0 -0
  52. siat/sector_china.py +0 -0
  53. siat/security_price2.py +0 -0
  54. siat/security_prices.py +40 -2
  55. siat/security_trend.py +0 -0
  56. siat/security_trend2.py +0 -0
  57. siat/stock.py +0 -0
  58. siat/stock_advice_linear.py +0 -0
  59. siat/stock_base.py +0 -0
  60. siat/stock_china.py +0 -0
  61. siat/stock_info.pickle +0 -0
  62. siat/stock_prices_kneighbors.py +0 -0
  63. siat/stock_prices_linear.py +0 -0
  64. siat/stock_profile.py +0 -0
  65. siat/stock_technical.py +0 -0
  66. siat/stooq.py +0 -0
  67. siat/transaction.py +0 -0
  68. siat/translate.py +0 -0
  69. siat/valuation.py +0 -0
  70. siat/valuation_china.py +0 -0
  71. siat/var_model_validation.py +0 -0
  72. siat/yf_name.py +0 -0
  73. {siat-3.10.132.dist-info/licenses → siat-3.10.133.dist-info}/LICENSE +0 -0
  74. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/METADATA +232 -235
  75. siat-3.10.133.dist-info/RECORD +78 -0
  76. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/WHEEL +1 -1
  77. {siat-3.10.132.dist-info → siat-3.10.133.dist-info}/top_level.txt +0 -1
  78. build/lib/build/lib/siat/__init__.py +0 -75
  79. build/lib/build/lib/siat/allin.py +0 -137
  80. build/lib/build/lib/siat/assets_liquidity.py +0 -915
  81. build/lib/build/lib/siat/beta_adjustment.py +0 -1058
  82. build/lib/build/lib/siat/beta_adjustment_china.py +0 -548
  83. build/lib/build/lib/siat/blockchain.py +0 -143
  84. build/lib/build/lib/siat/bond.py +0 -2900
  85. build/lib/build/lib/siat/bond_base.py +0 -992
  86. build/lib/build/lib/siat/bond_china.py +0 -100
  87. build/lib/build/lib/siat/bond_zh_sina.py +0 -143
  88. build/lib/build/lib/siat/capm_beta.py +0 -783
  89. build/lib/build/lib/siat/capm_beta2.py +0 -887
  90. build/lib/build/lib/siat/common.py +0 -5360
  91. build/lib/build/lib/siat/compare_cross.py +0 -642
  92. build/lib/build/lib/siat/copyrights.py +0 -18
  93. build/lib/build/lib/siat/cryptocurrency.py +0 -667
  94. build/lib/build/lib/siat/economy.py +0 -1471
  95. build/lib/build/lib/siat/economy2.py +0 -1853
  96. build/lib/build/lib/siat/esg.py +0 -536
  97. build/lib/build/lib/siat/event_study.py +0 -815
  98. build/lib/build/lib/siat/fama_french.py +0 -1521
  99. build/lib/build/lib/siat/fin_stmt2_yahoo.py +0 -982
  100. build/lib/build/lib/siat/financial_base.py +0 -1160
  101. build/lib/build/lib/siat/financial_statements.py +0 -598
  102. build/lib/build/lib/siat/financials.py +0 -2339
  103. build/lib/build/lib/siat/financials2.py +0 -1278
  104. build/lib/build/lib/siat/financials_china.py +0 -4433
  105. build/lib/build/lib/siat/financials_china2.py +0 -2212
  106. build/lib/build/lib/siat/fund.py +0 -629
  107. build/lib/build/lib/siat/fund_china.py +0 -3307
  108. build/lib/build/lib/siat/future_china.py +0 -551
  109. build/lib/build/lib/siat/google_authenticator.py +0 -47
  110. build/lib/build/lib/siat/grafix.py +0 -3636
  111. build/lib/build/lib/siat/holding_risk.py +0 -867
  112. build/lib/build/lib/siat/luchy_draw.py +0 -638
  113. build/lib/build/lib/siat/market_china.py +0 -1168
  114. build/lib/build/lib/siat/markowitz.py +0 -2363
  115. build/lib/build/lib/siat/markowitz2.py +0 -3150
  116. build/lib/build/lib/siat/markowitz2_20250704.py +0 -2969
  117. build/lib/build/lib/siat/markowitz2_20250705.py +0 -3158
  118. build/lib/build/lib/siat/markowitz_simple.py +0 -373
  119. build/lib/build/lib/siat/ml_cases.py +0 -2291
  120. build/lib/build/lib/siat/ml_cases_example.py +0 -60
  121. build/lib/build/lib/siat/option_china.py +0 -3069
  122. build/lib/build/lib/siat/option_pricing.py +0 -1925
  123. build/lib/build/lib/siat/other_indexes.py +0 -409
  124. build/lib/build/lib/siat/risk_adjusted_return.py +0 -1576
  125. build/lib/build/lib/siat/risk_adjusted_return2.py +0 -1900
  126. build/lib/build/lib/siat/risk_evaluation.py +0 -2218
  127. build/lib/build/lib/siat/risk_free_rate.py +0 -351
  128. build/lib/build/lib/siat/sector_china.py +0 -4140
  129. build/lib/build/lib/siat/security_price2.py +0 -727
  130. build/lib/build/lib/siat/security_prices.py +0 -3408
  131. build/lib/build/lib/siat/security_trend.py +0 -402
  132. build/lib/build/lib/siat/security_trend2.py +0 -646
  133. build/lib/build/lib/siat/stock.py +0 -4284
  134. build/lib/build/lib/siat/stock_advice_linear.py +0 -934
  135. build/lib/build/lib/siat/stock_base.py +0 -26
  136. build/lib/build/lib/siat/stock_china.py +0 -2095
  137. build/lib/build/lib/siat/stock_prices_kneighbors.py +0 -910
  138. build/lib/build/lib/siat/stock_prices_linear.py +0 -386
  139. build/lib/build/lib/siat/stock_profile.py +0 -707
  140. build/lib/build/lib/siat/stock_technical.py +0 -3305
  141. build/lib/build/lib/siat/stooq.py +0 -74
  142. build/lib/build/lib/siat/transaction.py +0 -347
  143. build/lib/build/lib/siat/translate.py +0 -5183
  144. build/lib/build/lib/siat/valuation.py +0 -1378
  145. build/lib/build/lib/siat/valuation_china.py +0 -2076
  146. build/lib/build/lib/siat/var_model_validation.py +0 -444
  147. build/lib/build/lib/siat/yf_name.py +0 -811
  148. build/lib/siat/__init__.py +0 -75
  149. build/lib/siat/allin.py +0 -137
  150. build/lib/siat/assets_liquidity.py +0 -915
  151. build/lib/siat/beta_adjustment.py +0 -1058
  152. build/lib/siat/beta_adjustment_china.py +0 -548
  153. build/lib/siat/blockchain.py +0 -143
  154. build/lib/siat/bond.py +0 -2900
  155. build/lib/siat/bond_base.py +0 -992
  156. build/lib/siat/bond_china.py +0 -100
  157. build/lib/siat/bond_zh_sina.py +0 -143
  158. build/lib/siat/capm_beta.py +0 -783
  159. build/lib/siat/capm_beta2.py +0 -887
  160. build/lib/siat/common.py +0 -5360
  161. build/lib/siat/compare_cross.py +0 -642
  162. build/lib/siat/copyrights.py +0 -18
  163. build/lib/siat/cryptocurrency.py +0 -667
  164. build/lib/siat/economy.py +0 -1471
  165. build/lib/siat/economy2.py +0 -1853
  166. build/lib/siat/esg.py +0 -536
  167. build/lib/siat/event_study.py +0 -815
  168. build/lib/siat/fama_french.py +0 -1521
  169. build/lib/siat/fin_stmt2_yahoo.py +0 -982
  170. build/lib/siat/financial_base.py +0 -1160
  171. build/lib/siat/financial_statements.py +0 -598
  172. build/lib/siat/financials.py +0 -2339
  173. build/lib/siat/financials2.py +0 -1278
  174. build/lib/siat/financials_china.py +0 -4433
  175. build/lib/siat/financials_china2.py +0 -2212
  176. build/lib/siat/fund.py +0 -629
  177. build/lib/siat/fund_china.py +0 -3307
  178. build/lib/siat/future_china.py +0 -551
  179. build/lib/siat/google_authenticator.py +0 -47
  180. build/lib/siat/grafix.py +0 -3636
  181. build/lib/siat/holding_risk.py +0 -867
  182. build/lib/siat/luchy_draw.py +0 -638
  183. build/lib/siat/market_china.py +0 -1168
  184. build/lib/siat/markowitz.py +0 -2363
  185. build/lib/siat/markowitz2.py +0 -3150
  186. build/lib/siat/markowitz2_20250704.py +0 -2969
  187. build/lib/siat/markowitz2_20250705.py +0 -3158
  188. build/lib/siat/markowitz_simple.py +0 -373
  189. build/lib/siat/ml_cases.py +0 -2291
  190. build/lib/siat/ml_cases_example.py +0 -60
  191. build/lib/siat/option_china.py +0 -3069
  192. build/lib/siat/option_pricing.py +0 -1925
  193. build/lib/siat/other_indexes.py +0 -409
  194. build/lib/siat/risk_adjusted_return.py +0 -1576
  195. build/lib/siat/risk_adjusted_return2.py +0 -1900
  196. build/lib/siat/risk_evaluation.py +0 -2218
  197. build/lib/siat/risk_free_rate.py +0 -351
  198. build/lib/siat/sector_china.py +0 -4140
  199. build/lib/siat/security_price2.py +0 -727
  200. build/lib/siat/security_prices.py +0 -3408
  201. build/lib/siat/security_trend.py +0 -402
  202. build/lib/siat/security_trend2.py +0 -646
  203. build/lib/siat/stock.py +0 -4284
  204. build/lib/siat/stock_advice_linear.py +0 -934
  205. build/lib/siat/stock_base.py +0 -26
  206. build/lib/siat/stock_china.py +0 -2095
  207. build/lib/siat/stock_prices_kneighbors.py +0 -910
  208. build/lib/siat/stock_prices_linear.py +0 -386
  209. build/lib/siat/stock_profile.py +0 -707
  210. build/lib/siat/stock_technical.py +0 -3305
  211. build/lib/siat/stooq.py +0 -74
  212. build/lib/siat/transaction.py +0 -347
  213. build/lib/siat/translate.py +0 -5183
  214. build/lib/siat/valuation.py +0 -1378
  215. build/lib/siat/valuation_china.py +0 -2076
  216. build/lib/siat/var_model_validation.py +0 -444
  217. build/lib/siat/yf_name.py +0 -811
  218. siat-3.10.132.dist-info/RECORD +0 -218
@@ -1,910 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- @function: 预测美股股价,教学演示用,其他用途责任自负,仅适用于机器学习课堂案例演示
4
- @version:v1.4,2020.4.6
5
- @purpose: 机器学习课程案例
6
- @author: 王德宏,北京外国语大学国际商学院
7
- 特点:加入了多进程并行处理mp,在多核电脑上可以明显提速
8
- """
9
-
10
- #==============================================================================
11
- import warnings; warnings.filterwarnings('ignore')
12
- #==============================================================================
13
- def get_stock_price(ticker,atdate,fromdate):
14
- """
15
- 功能:抓取股票收盘价
16
- 输出:指定股票的收盘价格序列,最新日期的股价排列在前
17
- ticker:股票代码
18
- atdate:当前日期,既可以是今天日期,也可以是一个历史日期,datetime类型
19
- fromdate:样本开始日期,尽量远的日期,以便取得足够多的原始样本,类型同atdate
20
- """
21
- #抓取股票价格
22
- try:
23
- from pandas_datareader import data
24
- except:
25
- print(".Error(get_stock_price), pls install pandas_datareader first!")
26
- return None
27
- try:
28
- price=data.DataReader(ticker,'yahoo',fromdate,atdate)
29
- except:
30
- print(".Error(get_stock_price), failed to capture stock prices:",ticker,fromdate,atdate)
31
- return None
32
- #去掉比起始日期更早的样本
33
- #price=price[price.index >= fromdate]
34
-
35
- #按日期降序排序,近期的价格排在前面
36
- sortedprice=price.sort_index(axis=0,ascending=False)
37
-
38
- #提取日期和星期几
39
- sortedprice['Date']=sortedprice.index.strftime("%Y-%m-%d")
40
- sortedprice['Weekday']=sortedprice.index.weekday+1
41
-
42
- #生成输出数据格式:日期,星期几,收盘价
43
- dfprice=sortedprice[['Date','Weekday','Close']]
44
-
45
- return dfprice
46
-
47
-
48
- if __name__=='__main__':
49
- ticker='MSFT'
50
- atdate='4/2/2020'
51
- fromdate='1/1/2015'
52
- dfprice=get_stock_price('MSFT','4/2/2020','1/1/2015')
53
- dfprice.head(5)
54
- dfprice.tail(3)
55
- dfprice[dfprice.Date == '2019-03-29']
56
- dfprice[(dfprice.Date>='2019-03-20') & (dfprice.Date<='2019-03-29')]
57
-
58
-
59
- #==============================================================================
60
- def make_price_sample(dfprice,n_nextdays=1,n_samples=252,n_features=21):
61
- """
62
- 功能:生成指定股票的价格样本
63
- ticker:股票代码
64
- n_nextdays:预测从atdate开始未来第几天的股价,默认为1
65
- n_samples:需要生成的样本个数,默认252个(一年的平均交易天数)
66
- n_features:使用的特征数量,默认21个(一个月的平均交易天数)
67
- """
68
- #检查样本数量是否够用
69
- n_req=n_features+n_nextdays+n_samples
70
- if len(dfprice) < n_req:
71
- print(".Error(make_price_sample), need more number of stock prices!")
72
- print("...There are only",len(dfprice),"obs in the stock price file")
73
- print("...But, I need at least",n_req,"obs to make ML samples")
74
- return None,None,None
75
-
76
- #提取收盘价,Series类型
77
- closeprice=dfprice.Close
78
-
79
- #转换为机器学习需要的ndarray类型
80
- import numpy as np
81
- ndprice=np.asmatrix(closeprice,dtype=None)
82
-
83
- #生成第一个标签样本:标签矩阵y(形状:n_samples x 1)
84
- y=np.asmatrix(ndprice[0,0])
85
- #生成第一个特征样本:特征矩阵X(形状:n_samples x n_features)
86
- X=ndprice[0,n_nextdays:n_features+n_nextdays]
87
-
88
- #生成其余的标签样本和特征样本
89
- for i in range(1,n_samples):
90
- #加入到标签矩阵中
91
- y_row=np.asmatrix(ndprice[0,i])
92
- y=np.append(y,y_row,axis=0)
93
- #加入到特征矩阵中
94
- X_row=ndprice[0,(n_nextdays+i):(n_features+n_nextdays+i)]
95
- X=np.append(X,X_row,axis=0)
96
-
97
- return X,y,ndprice
98
-
99
- if __name__=='__main__':
100
- dfprice=get_stock_price('LK','4/3/2020','1/1/2015')
101
- X,y,ndprice=make_price_sample(dfprice,1,200,21)
102
- y[:5]
103
- y[2:5] #第1行的序号为0
104
- X[:5]
105
- X[:-5]
106
- X[3-1,2-1]
107
-
108
-
109
- #==============================================================================
110
- def bestKN(X,y,maxk=10,random_state=0):
111
- """
112
- 功能:给定特征矩阵和标签,返回最优的邻居个数(默认最大为10)和模型
113
- 最优策略:测试集分数最高,不管过拟合问题
114
- """
115
- #随机分割样本为训练集和测试集
116
- from sklearn.model_selection import train_test_split
117
- X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=random_state)
118
-
119
- #引用k近邻模型的预测器(Regressor)
120
- from sklearn.neighbors import KNeighborsRegressor
121
- bestk=1
122
- reg=KNeighborsRegressor(n_neighbors=bestk,weights='distance',n_jobs=-1)
123
- reg.fit(X_train,y_train)
124
- bestmodel=reg
125
- bestscore_train=bestmodel.score(X_train,y_train)
126
- bestscore_test =bestmodel.score(X_test,y_test)
127
-
128
- for k in range(2,(maxk+1)):
129
- reg=KNeighborsRegressor(n_neighbors=k,weights='distance',n_jobs=-1)
130
- reg.fit(X_train,y_train)
131
- score_train=reg.score(X_train,y_train)
132
- score_test =reg.score(X_test,y_test)
133
-
134
- if score_test > bestscore_test:
135
- bestk=k
136
- bestscore_train=score_train
137
- bestscore_test =score_test
138
- bestmodel=reg
139
-
140
- return bestmodel,bestk,bestscore_train,bestscore_test
141
-
142
- if __name__=='__main__':
143
- dfprice=get_stock_price('MSFT','4/3/2019','1/1/2015')
144
- X,y,ndprice=make_price_sample(dfprice,1,240,20)
145
- bestmodel,bestk,bestscore_train,bestscore_test=bestKN(X,y)
146
- print(bestk,bestscore_train,bestscore_test)
147
-
148
-
149
- #==============================================================================
150
- def bestFN(dfprice,n_nextdays=1,n_samples=252,maxFN=252,random_state=0):
151
- """
152
- 功能:给定股价序列,试验最优的特征个数(默认最大为60)和模型
153
- 最优策略:测试集分数最高,不管过拟合问题
154
- """
155
- #试验起点:1个特征个数
156
- bestf=1
157
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf)
158
- #测试给定特征个数时的最优邻居个数
159
- bestmodel,bestk,bestscore_train,bestscore_test=bestKN(X,y,random_state=random_state)
160
-
161
- #特征个数增长的步长
162
- n_step=1
163
- for f in range(2,maxFN+1,n_step):
164
- if len(dfprice) < (n_nextdays+n_samples+f): break
165
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,f)
166
- model,k,score_train,score_test=bestKN(X,y,random_state=random_state)
167
-
168
- if score_test > bestscore_test:
169
- bestf=f
170
- bestk=k
171
- bestscore_train=score_train
172
- bestscore_test =score_test
173
- bestmodel=model
174
-
175
- #返回测试集效果最好的模型、特征个数、邻居个数、成绩
176
- return bestmodel,bestf,bestk,bestscore_train,bestscore_test
177
-
178
- if __name__=='__main__':
179
- dfprice=get_stock_price('MSFT','4/4/2020','1/1/2015')
180
- bestmodel,bestf,bestk,bestscore_train,bestscore_test= \
181
- bestFN(dfprice,1,252)
182
-
183
- print("best f=",bestf,",best k=",bestk, \
184
- "\nbest score on train=",bestscore_train, \
185
- "\nbest score on test=",bestscore_test)
186
-
187
-
188
- #==============================================================================
189
- def bestKN2(X,y,maxk=10,random_state=0):
190
- """
191
- 功能:给定特征矩阵和标签,返回最优的邻居个数(默认最大为10)和模型
192
- 最优策略2:训练集和测试集分数最接近,希望控制过拟合和欠拟合问题
193
- """
194
- #随机分割样本为训练集和测试集
195
- from sklearn.model_selection import train_test_split
196
- X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=random_state)
197
-
198
- #引用k近邻模型的预测器(Regressor)
199
- from sklearn.neighbors import KNeighborsRegressor
200
- bestk=1
201
- reg=KNeighborsRegressor(n_neighbors=bestk,weights='distance',n_jobs=-1)
202
- reg.fit(X_train,y_train)
203
- bestmodel=reg
204
- bestscore_train=reg.score(X_train,y_train)
205
- bestscore_test =reg.score(X_test,y_test)
206
-
207
- import numpy as np
208
- bestrate=np.abs(bestscore_train / bestscore_test -1)
209
-
210
- for k in range(2,(maxk+1)):
211
- reg=KNeighborsRegressor(n_neighbors=k,weights='distance',n_jobs=-1)
212
- reg.fit(X_train,y_train)
213
- score_train=reg.score(X_train,y_train)
214
- score_test =reg.score(X_test,y_test)
215
- rate=np.abs(score_train / score_test -1)
216
-
217
- if rate < bestrate:
218
- bestk=k
219
- bestrate=rate
220
- bestscore_train=score_train
221
- bestscore_test =score_test
222
- bestmodel=reg
223
- return bestmodel,bestk,bestscore_train,bestscore_test,bestrate
224
-
225
- if __name__=='__main__':
226
- dfprice=get_stock_price('MSFT','3/27/2019','1/1/2015')
227
- X,y,ndprice=make_price_sample(dfprice,1,252,21)
228
-
229
- bestmodel,bestk,bestscore_train,bestscore_test=bestKN(X,y)
230
- print("best k=",bestk,"\nbest score on train=",bestscore_train, \
231
- ",best score on test=",bestscore_test)
232
-
233
- bestmodel,bestk,bestscore_train,bestscore_test,bestrate=bestKN2(X,y)
234
- print("best k=",bestk,"\nbest score on train=",bestscore_train, \
235
- ",best score on test=",bestscore_test)
236
-
237
-
238
-
239
- #==============================================================================
240
- def bestFN2(dfprice,n_nextdays=1,n_samples=252,maxFN=252,random_state=0):
241
- """
242
- 功能:给定股价序列,试验最优的特征个数(默认最大为252)和模型
243
- 最优策略2:训练集和测试集分数最接近,希望控制过拟合和欠拟合问题
244
- """
245
- #试验起点:1个特征个数
246
- bestf=1
247
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf)
248
- #测试给定特征个数时的最优邻居个数
249
- bestmodel,bestk,bestscore_train,bestscore_test,bestrate=bestKN2(X,y,random_state=random_state)
250
-
251
- #特征个数增长的步长
252
- n_step=1
253
- for f in range(2,maxFN+1,n_step):
254
- if len(dfprice) < (n_nextdays+n_samples+f): break
255
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,f)
256
- model,k,score_train,score_test,rate=bestKN2(X,y,random_state=random_state)
257
-
258
- if rate < bestrate:
259
- bestf=f
260
- bestk=k
261
- bestscore_train=score_train
262
- bestscore_test =score_test
263
- bestrate=rate
264
- bestmodel=model
265
-
266
- #返回测试集效果最好的模型、特征个数、邻居个数、成绩
267
- return bestmodel,bestf,bestk,bestscore_train,bestscore_test,bestrate
268
-
269
- if __name__=='__main__':
270
- dfprice=get_stock_price('MSFT','3/27/2019','1/1/2015')
271
- bestmodel,bestf,bestk,bestscore_train,bestscore_test= \
272
- bestFN(dfprice,1,252)
273
- print("best f=",bestf,",best k=",bestk, \
274
- "\nbest score on train=",bestscore_train, \
275
- "\nbest score on test=",bestscore_test)
276
-
277
- bestmodel,bestf,bestk,bestscore_train,bestscore_test= \
278
- bestFN2(ndprice,1,252)
279
- print("best f=",bestf,",best k=",bestk, \
280
- "\nbest score on train=",bestscore_train, \
281
- "\nbest score on test=",bestscore_test)
282
-
283
- #==============================================================================
284
- def isdate(adate):
285
- """
286
- 功能:根据日期的合理性
287
- 输入参数:
288
- adate:日期。格式:YYYY-MM-DD
289
- 输出:无
290
- 返回:有效/无效日期(True/False)
291
- """
292
- import pandas as pd
293
- #测试开始日期的合理性
294
- try: adatedt=pd.to_datetime(adate)
295
- except: return False
296
- else: return True
297
-
298
- #==============================================================================
299
- def date_adjust(basedate, adjust=0):
300
- """
301
- 功能:将给定日期向前或向后调整特定的天数
302
- 输入:基础日期,需要调整的天数。
303
- basedate: 基础日期。
304
- adjust:需要调整的天数,负数表示向前调整,正数表示向后调整。
305
- 输出:调整后的日期。
306
- """
307
- #检查基础日期的合理性
308
- import pandas as pd
309
- try:
310
- bd=pd.to_datetime(basedate)
311
- except:
312
- print("*** 错误#1(date_adjust),无效的日期:",basedate)
313
- return None
314
-
315
- #调整日期
316
- from datetime import timedelta
317
- nd = bd+timedelta(days=adjust)
318
-
319
- #重新提取日期
320
- newdate=nd.date()
321
- return str(newdate)
322
-
323
- if __name__ =="__main__":
324
- basedate='2020-3-17'
325
- adjust=-365
326
- newdate = date_adjust(basedate, adjust)
327
- print(newdate)
328
-
329
- #==============================================================================
330
- def forecast_stock_price(ticker,atdate,n_nextdays,n_samples=252, \
331
- maxk=20,maxFN=252,random_state=0,printout=True):
332
- """
333
- 功能:预测未来第几天的股票收盘价,执行FN和FN2优化策略
334
- """
335
- #检查日期的合理性
336
- if not isdate(atdate):
337
- print(".Error(forecast_stock_price), invalid date:",atdate)
338
- return None
339
-
340
- print("... Predicting stock price, it may take long time, please wait ......")
341
-
342
- #设定起始日期:
343
- nyears=int((n_nextdays + n_samples + maxFN + 1)/252)+2
344
- start=date_adjust(atdate,-366*nyears)
345
-
346
- #抓取股价数据
347
- dfprice=get_stock_price(ticker,atdate,start)
348
- if dfprice is None:
349
- print(".Error(forecast_stock_price), failed to capture stock prices:",ticker)
350
- return None
351
- if len(dfprice) < (n_nextdays + n_samples + maxFN + 1):
352
- print(".Error(forecast_stock_price), insufficient number of stock prices!")
353
- return None
354
-
355
- #生成机器学习样本1: 确定最佳特征个数bestf,不管过拟合/欠拟合问题
356
- bestmodel1,bestf1,bestk1,bestscore_train1,bestscore_test1= \
357
- bestFN(dfprice,n_nextdays,n_samples,random_state=random_state)
358
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf1)
359
-
360
- #基于最新特征样本X_new,预测第n_nextdays的股价
361
- X_new1=ndprice[0,0:bestf1]
362
- y_new1=bestmodel1.predict(X_new1)
363
-
364
-
365
- #生成机器学习样本2: 确定最佳特征个数bestf,考虑过拟合/欠拟合问题
366
- bestmodel2,bestf2,bestk2,bestscore_train2,bestscore_test2,bestrate2= \
367
- bestFN2(dfprice,n_nextdays,n_samples)
368
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf2)
369
- X_new2=ndprice[0,0:bestf2]
370
- y_new2=bestmodel2.predict(X_new2)
371
-
372
-
373
- #最终决定:以最大测试成绩为优先
374
- if bestscore_test1 <= bestscore_test2:
375
- predicted_y=y_new2[0,0]
376
- bestscore_train=bestscore_train2
377
- bestscore_test=bestscore_test2
378
- bestfeature=bestf2
379
- bestk=bestk2
380
- else:
381
- predicted_y=y_new1[0,0]
382
- bestscore_train=bestscore_train1
383
- bestscore_test=bestscore_test1
384
- bestfeature=bestf1
385
- bestk=bestk1
386
- if printout:
387
- print(" Forecasted price:%10.2f" % predicted_y)
388
- print(" Best score on train:",round(bestscore_train,4))
389
- print(" Best score on test:",round(bestscore_test,4))
390
- print(" Best number of features:",bestfeature)
391
- print(" Best number of neighbors:",bestk)
392
-
393
- return predicted_y,bestscore_train,bestscore_test,bestfeature,bestk
394
-
395
-
396
- if __name__ =="__main__":
397
- ticker='MSFT'
398
- atdate="2020-4-2"
399
- n_nextdays=1
400
- info=forecast_stock_price(ticker,atdate,n_nextdays)
401
- print(info)
402
-
403
- #==============================================================================
404
- def forecast_stock_price2(dfprice,n_nextdays,n_samples=252, \
405
- maxk=20,maxFN=252,random_state=0):
406
- """
407
- 功能:预测未来第几天的股票收盘价,执行FN和FN2优化策略,单一随机数种子
408
- """
409
- #生成机器学习样本1: 确定最佳特征个数bestf,不管过拟合/欠拟合问题
410
- bestmodel1,bestf1,bestk1,bestscore_train1,bestscore_test1= \
411
- bestFN(dfprice,n_nextdays,n_samples,random_state=random_state)
412
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf1)
413
-
414
- #基于最新特征样本X_new,预测第n_nextdays的股价
415
- X_new1=ndprice[0,0:bestf1]
416
- y_new1=bestmodel1.predict(X_new1)
417
-
418
-
419
- #生成机器学习样本2: 确定最佳特征个数bestf,考虑过拟合/欠拟合问题
420
- bestmodel2,bestf2,bestk2,bestscore_train2,bestscore_test2,bestrate2= \
421
- bestFN2(dfprice,n_nextdays,n_samples)
422
- X,y,ndprice=make_price_sample(dfprice,n_nextdays,n_samples,bestf2)
423
- X_new2=ndprice[0,0:bestf2]
424
- y_new2=bestmodel2.predict(X_new2)
425
-
426
- #最终决定:以最大测试成绩为优先
427
- if bestscore_test1 <= bestscore_test2:
428
- predicted_y=y_new2[0,0]
429
- bestscore_train=bestscore_train2
430
- bestscore_test=bestscore_test2
431
- bestfeature=bestf2
432
- bestk=bestk2
433
- else:
434
- predicted_y=y_new1[0,0]
435
- bestscore_train=bestscore_train1
436
- bestscore_test=bestscore_test1
437
- bestfeature=bestf1
438
- bestk=bestk1
439
-
440
- return round(predicted_y,2),round(bestscore_train,4), \
441
- round(bestscore_test,4),bestfeature,bestk
442
-
443
-
444
- if __name__ =="__main__":
445
- ticker='MSFT'
446
- atdate="2020-4-2"
447
- n_nextdays=1
448
- dfprice=get_stock_price('MSFT','4/2/2020','1/1/2015')
449
- info=forecast_stock_price2(dfprice,n_nextdays)
450
- print(info)
451
-
452
- #==============================================================================
453
- def weighted_median(df,colname,colweight):
454
- """
455
- 功能:求加权中位数
456
- 输入:数据表df, 需要求中位数的列名colname, 权重所在的列名colweight
457
- 返回:50%中位数数值
458
- """
459
- from statsmodels.stats.weightstats import DescrStatsW
460
- wdf = DescrStatsW(df[colname], weights=df[colweight], ddof=1)
461
-
462
- if len(df) >= 3:
463
- wmedianlist=list(wdf.quantile([0.50]))
464
- wmedian=wmedianlist[0]
465
- elif len(df) == 2:
466
- wmedian=(df[colname][0]*df[colweight][0]+df[colname][1]*df[colweight][1])/(df[colweight][0]+df[colweight][1])
467
- elif len(df) == 1:
468
- wmedian=df[colname][0]
469
- else:
470
- return None
471
-
472
- return wmedian
473
-
474
- if __name__ =="__main__":
475
- import pandas as pd
476
- df=pd.DataFrame({ 'x':range(1,3), 'wt':range(1,3) })
477
- colname='x'
478
- colweight='wt'
479
- weighted_median(df,colname,colweight)
480
-
481
- #==============================================================================
482
- def second2time(seconds):
483
- """
484
- 功能:将秒数转换为时分秒
485
- 输入:秒数
486
- 返回:时分秒,字符串
487
- """
488
- hours=int(seconds/3600)
489
- minutes=int((seconds-hours*3600)/60)
490
-
491
- if seconds >= 60:
492
- decm=1
493
- elif seconds >= 10:
494
- decm=1
495
- elif seconds >= 0.1:
496
- decm=2
497
- else:
498
- decm=4
499
- miaos=round(seconds-hours*3600-minutes*60,decm)
500
- timestr=str(hours)+":"+str(minutes)+":"+str(miaos)
501
-
502
- return timestr
503
-
504
- if __name__ =="__main__":
505
- second2time(590.58963)
506
- second2time(65.456321)
507
- second2time(35.75698)
508
- second2time(5.75698)
509
- second2time(0.75698)
510
- second2time(0.00098)
511
- #==============================================================================
512
- def save_to_excel(df,excelfile="myfile01.xlsx",sheetname="Sheet1"):
513
- """
514
- 函数功能:将df保存到当前目录下的Excel文件。
515
- 如果未指定Excel文件则默认为"myfile.xls"
516
- 如果Excel文件不存在则创建文件并保存到指定的sheetname;如果未指定sheetname则默
517
- 认为"First"
518
- 如果Excel文件存在但sheetname不存在则增加sheetname并保存df内容,原有sheet内容
519
- 不变;
520
- 如果Excel文件和sheetname都存在则追加df内容到已有sheet的末尾
521
- 输入参数:
522
- df: 数据框
523
- excelfile: Excel文件名,不带目录,后缀为.xls或.xlsx
524
- sheetname:Excel文件中的sheet名
525
- 输出:
526
- 保存df到Excel文件
527
- 无返回数据
528
- 注意:如果df中含有以文本表示的数字,写入到Excel会被自动转换为数字类型保存。
529
- 从Excel中读出后为数字类型,因此将会与df的类型不一致
530
- """
531
- #取得df字段列表
532
- dflist=list(df)
533
- #合成完整的带目录的文件名
534
- filename=excelfile
535
-
536
- import pandas as pd
537
- try:
538
- file1=pd.ExcelFile(excelfile)
539
- except:
540
- #不存在excelfile文件,直接写入
541
- df.to_excel(filename,sheet_name=sheetname, \
542
- header=True,encoding='utf-8')
543
- print("*** Results saved in",filename,"@ sheet",sheetname)
544
- return
545
- else:
546
- #已存在excelfile文件,先将所有sheet的内容读出到dict中
547
- dict=pd.read_excel(file1, None)
548
- file1.close()
549
-
550
- #获得所有sheet名字
551
- sheetlist=list(dict.keys())
552
- #检查新的sheet名字是否已存在
553
- try:
554
- pos=sheetlist.index(sheetname)
555
- except:
556
- #不存在重复
557
- dup=False
558
- else:
559
- #存在重复,合并内容
560
- dup=True
561
- #合并之前可能需要对df中以字符串表示的数字字段进行强制类型转换.astype('int')
562
- df1=dict[sheetlist[pos]][dflist]
563
- dfnew=pd.concat([df1,df],axis=0,ignore_index=True)
564
- dict[sheetlist[pos]]=dfnew
565
-
566
- #将原有内容写回excelfile
567
- result=pd.ExcelWriter(filename)
568
- for s in sheetlist:
569
- df1=dict[s][dflist]
570
- df1.to_excel(result,s,header=True,index=True,encoding='utf-8')
571
- #写入新内容
572
- if not dup: #sheetname未重复
573
- df.to_excel(result,sheetname,header=True,index=True,encoding='utf-8')
574
- try:
575
- result.save()
576
- result.close()
577
- except:
578
- print("... Error(save_to_excel): writing file failed",filename,"@ sheet",sheetname)
579
- print("Information:",filename)
580
- return
581
- print("*** Results saved in",filename,"@ sheet",sheetname)
582
-
583
- return
584
-
585
-
586
- #==============================================================================
587
- def forecast_stock_price_rs(ticker,atdate,n_nextdays=1,n_samples=252, \
588
- maxk=20,maxFN=252,random_state=0,maxRS=9, \
589
- excelfile="myfile01.xlsx",sheetname="Sheet1"):
590
- """
591
- 功能:预测未来第几天的股票收盘价,试验随机数种子策略
592
- 输入参数:
593
- 1、ticker: 股票代码
594
- 2、atdate: 当前日期,可以是今天或以前的一天
595
- 3、n_nextdays: 以atdate为基准向前推进几个交易日,预测该日期的股价
596
- 4、n_samples: 生成机器学习用的样本中的最大观察数目。
597
- 跨年的样本有助于模型学习季节性效应,3年的样本效果好于2年,
598
- 2年的样本效果好于1年
599
- 5、maxk:试探的最大邻居个数
600
- 6、maxFN:试探的最大特征个数
601
- 7、random_state: 开始试探时的随机数种子
602
- 8、maxRS: 用于试探的最大的随机数种子
603
- 9、excelfile:保存文件的名字
604
- 10、sheetname:Excel文件的sheet名字
605
- 输出:每次迭代取得更好的测试集分数时,输出模型参数和预测的股价
606
- 返回:最优测试集的模型参数及预测的股价,以及各个迭代最优结果下预测的股价的
607
- 加权中位数,权重为各个测试集分数。
608
- """
609
- #检查日期的合理性
610
- if not isdate(atdate):
611
- print(".Error(forecast_stock_price_rs), invalid date:",atdate)
612
- return None
613
-
614
- #开始计时
615
- print("\n... Predicting stock price, it may take very long time, please wait ......")
616
- import time
617
- time0 = time.perf_counter()
618
-
619
- #设定起始日期:
620
- nyears=int((n_nextdays + n_samples + maxFN + 1)/252)+2
621
- start=date_adjust(atdate,-366*nyears)
622
-
623
- #抓取股价数据
624
- dfprice=get_stock_price(ticker,atdate,start)
625
- if dfprice is None:
626
- print(".Error(forecast_stock_price_rs), failed to capture stock prices:",ticker)
627
- return None
628
- if len(dfprice) < (n_nextdays + n_samples + maxFN + 1):
629
- print(".Error(forecast_stock_price_rs), insufficient number of stock prices!")
630
- return None
631
-
632
- #设置测试集分数起点
633
- bestscore_test=0.0
634
- #建立结果表结构
635
- import pandas as pd
636
- result=pd.DataFrame(columns=('ticker','atdate','n_nextdays','n_samples', \
637
- 'random_state','pred_y','bestscore_train', \
638
- 'bestscore_test','bestfeature','bestk'))
639
- #倒序随机数种子,便于尽快看到最优结果
640
- rslist=list(range(random_state,maxRS+1))
641
- rslist.reverse()
642
- #开始逐一试探各个随机数种子的最佳分数
643
- for rs in rslist:
644
- print("... Testing random seed:",rs)
645
- pred_y0,bestscore_train0,bestscore_test0,bestfeature0,bestk0= \
646
- forecast_stock_price2(dfprice,n_nextdays=n_nextdays, \
647
- n_samples=n_samples,maxk=maxk, \
648
- maxFN=maxFN,random_state=rs)
649
-
650
- #记录中间结果
651
- row=pd.Series({'ticker':ticker,'atdate':atdate,'n_nextdays':n_nextdays, \
652
- 'n_samples':n_samples,'random_state':rs,'pred_y':pred_y0, \
653
- 'bestscore_train':bestscore_train0,'bestscore_test':bestscore_test0, \
654
- 'bestfeature':bestfeature0,'bestk':bestk0})
655
- result=result.append(row,ignore_index=True)
656
-
657
- #更新最佳纪录
658
- if bestscore_test < bestscore_test0:
659
- pred_y=pred_y0
660
- bestscore_train=bestscore_train0
661
- bestscore_test=bestscore_test0
662
- bestfeature=bestfeature0
663
- bestk=bestk0
664
-
665
- print(" Predicted stock price :",pred_y)
666
- print(" Best score on train :",bestscore_train)
667
- print(" Best score on test :",bestscore_test)
668
- print(" Best number of features :",bestfeature)
669
- print(" Best number of neighbors:",bestk,"\n")
670
-
671
- #再度显示中间结果
672
- pd.set_option('display.unicode.ambiguous_as_wide', True)
673
- pd.set_option('display.unicode.east_asian_width', True)
674
- pd.set_option('display.width', 180) # 设置打印宽度(**重要**)
675
- print("... Summary:")
676
- print(result.to_string(index=False))
677
- print("\n... Result by highest score on test:",result['pred_y'][-1].values[0])
678
-
679
- #计算运行时间
680
- time1 = time.perf_counter()
681
- elapsed=time1 - time0
682
- print("... Total elapsed time is",second2time(elapsed))
683
-
684
- save_to_excel(result,excelfile,sheetname)
685
- print("... Results saved in an Excel file:",excelfile,"@sheet",sheetname)
686
-
687
- return result
688
-
689
- if __name__ =="__main__":
690
- ticker='MSFT'
691
- atdate="2020-4-5"
692
- n_nextdays=1
693
- maxRS=1
694
- info=forecast_stock_price_rs(ticker,atdate,n_nextdays,maxRS=maxRS)
695
- print(info.to_string(index=False))
696
-
697
- #==============================================================================
698
- def multisummary(result,notes='',top=5):
699
- """
700
- 功能:计算其加权中位数
701
- 输入参数:
702
- 1、result: 各个随机数种子下的最优预测值
703
- 2、top: 采用测试分数最高的几个结果参加加权中位数计算
704
- 输出:加权中位数
705
- 返回:预测的股价的加权中位数,权重为各个测试集分数。
706
- """
707
-
708
- #检查文件是否为空
709
- if len(result)==0:
710
- print("... Error(multisummary), No data recieved!")
711
- return None
712
-
713
- #排序: 升序
714
- result.sort_values(by=["bestscore_test","bestfeature"],ascending=[True,True],inplace=True)
715
-
716
- #对预测的股价取加权中位数
717
- if len(result) < top: top=len(result)
718
- topdata=result.tail(top)
719
- pred_y_wmedian=round(weighted_median(topdata,'pred_y','bestscore_test'),2)
720
-
721
- #显示详细结果
722
- import pandas as pd
723
- pd.set_option('display.unicode.ambiguous_as_wide', True)
724
- pd.set_option('display.unicode.east_asian_width', True)
725
- pd.set_option('display.width', 180) # 设置打印宽度(**重要**)
726
-
727
- print("\n... Summary:",notes)
728
- print(result.to_string(index=False))
729
- hsotest=round(result.tail(1)['pred_y'].values[0],2)
730
- if notes == 'final':
731
- print("\n... Predicted price by highest score on test:",hsotest)
732
- print("... Predicted in median weighted by score on test:",pred_y_wmedian)
733
-
734
- return hsotest,pred_y_wmedian
735
-
736
- if __name__ =="__main__":
737
- wmprice=multisummary(result,top=5)
738
-
739
- #==============================================================================
740
- def forecast_stock_price3(dfprice,n_nextdays=1,n_samples=252*3, \
741
- maxk=20,maxFN=252*3,random_state=0):
742
- """
743
- 功能:预测未来第几天的股票收盘价,试验单个随机数种子策略。可作为独立进程
744
- 输入参数:
745
- 1、dfprice: 抓取的股价数据集
746
- 2、n_nextdays: 以atdate为基准向前推进几个交易日,预测该日期的股价
747
- 3、n_samples: 生成机器学习用的样本中的最大观察数目。
748
- 跨年的样本有助于模型学习季节性效应,3年的样本效果好于2年,
749
- 2年的样本效果好于1年
750
- 4、maxk:试探的最大邻居个数
751
- 5、maxFN:试探的最大特征个数
752
- 6、random_state: 随机数种子
753
- 输出:单次迭代取得更好的测试集分数时,输出模型参数和预测的股价
754
- 返回:最优测试集的模型参数及预测的股价。
755
- """
756
- #显示进程号
757
- import multiprocessing as mp
758
- pname=mp.current_process().name
759
- print("... Starting sub-process",pname,"with random_state",random_state)
760
-
761
- #试探一个随机数种子的最佳分数
762
- pred_y0,bestscore_train0,bestscore_test0,bestfeature0,bestk0= \
763
- forecast_stock_price2(dfprice,n_nextdays=n_nextdays, \
764
- n_samples=n_samples,maxk=maxk, \
765
- maxFN=maxFN,random_state=random_state)
766
- #记录中间结果
767
- import pandas as pd
768
- row=pd.Series({'random_state':random_state,'pred_y':pred_y0, \
769
- 'bestscore_train':bestscore_train0,'bestscore_test':bestscore_test0, \
770
- 'bestfeature':bestfeature0,'bestk':bestk0})
771
-
772
- print("... Endting sub-process",pname)
773
- return row
774
-
775
- if __name__ =="__main__":
776
- ticker='MSFT'
777
- atdate="2020-4-5"
778
- n_nextdays=1
779
- random_state=0
780
- info=forecast_stock_price3(dfprice,n_nextdays,random_state=random_state)
781
- print(info)
782
-
783
- #==============================================================================
784
- def forecast_stock_price_mp(ticker,atdate,n_nextdays=1,n_samples=252*3, \
785
- maxk=20,maxFN=252*3,random_state=0,maxRS=9,top=5):
786
- """
787
- 功能:预测未来第几天的股票收盘价,试验随机数种子策略,多进程
788
- 输入参数:
789
- 1、ticker: 股票代码
790
- 2、atdate: 当前日期,可以是今天或以前的一天
791
- 3、n_nextdays: 以atdate为基准向前推进几个交易日,预测该日期的股价
792
- 4、n_samples: 生成机器学习用的样本中的最大观察数目。
793
- 跨年的样本有助于模型学习季节性效应,3年的样本效果好于2年,
794
- 2年的样本效果好于1年
795
- 5、maxk:试探的最大邻居个数
796
- 6、maxFN:试探的最大特征个数
797
- 7、random_state: 开始试探时的随机数种子
798
- 8、maxRS: 用于试探的最大的随机数种子
799
- 9、top: 最后中参与计算加权中位数的个数
800
- 输出:每次迭代取得更好的测试集分数时,输出模型参数和预测的股价
801
- 返回:最优测试集的模型参数及预测的股价,以及各个迭代最优结果下预测的股价的
802
- 加权中位数,权重为各个测试集分数。
803
- """
804
- #调试开关
805
- DEBUG=True
806
-
807
- #检查日期的合理性
808
- if not isdate(atdate):
809
- print(".Error(forecast_stock_price_rs), invalid date:",atdate)
810
- return None
811
-
812
- #开始信息
813
- print("\n... Predicting stock price by knn model ......")
814
- print(" Stock:",ticker)
815
- print(" Observation date:",atdate)
816
- print(" Number of trading day(s) being predicted:",n_nextdays)
817
- print(" Max number of historical prices used:",n_samples)
818
- print(" Max number of features used in knn:",maxFN)
819
- print(" Max number of neighbors used in knn:",maxk)
820
- print(" Max number of obs used in weighted meadian:",top)
821
- print(" WARNING: It may take long time, please wait ......")
822
- #开始计时
823
- import time; time0 = time.perf_counter()
824
-
825
- print("\n... Capturing historical stock prices ......",end='')
826
- #设定起始日期:
827
- nyears=int((n_nextdays + n_samples + maxFN + 1)/252)+1
828
- start=date_adjust(atdate,-366*nyears)
829
- #抓取股价数据
830
- dfprice=get_stock_price(ticker,atdate,start)
831
- if dfprice is None:
832
- print("\n Error(forecast_stock_price_mp), failed to capture stock prices:",ticker)
833
- return None
834
- if len(dfprice) < (n_nextdays + n_samples + maxFN + 1):
835
- print("\n Error(forecast_stock_price_mp), insufficient number of stock prices!")
836
- return None
837
- print(", done!")
838
- print(" ",len(dfprice),"historical stock prices captured")
839
-
840
- print("... Start machine-learning using knn model in multiprocessing ......")
841
- #倒序随机数种子,便于尽快看到最优结果
842
- rslist=list(range(random_state,maxRS+1)); rslist.reverse()
843
- jobnum=len(rslist)
844
-
845
- #电脑CPU核心数
846
- import os; cores=os.cpu_count()
847
- print(" There are",cores,"core(s) inside the cpu of this computer")
848
- #确定进程池大小
849
- if cores <= 4: procnum=cores+1
850
- else: procnum=cores
851
- #确定多进程分组组数
852
- groupnum=int(jobnum / procnum); remain=jobnum % procnum
853
- if remain > 0: groupnum+=1
854
- group=list(range(groupnum))
855
-
856
- #建立数据集:记录各个进程输出结果
857
- import pandas as pd
858
- result=pd.DataFrame(columns=('random_state','pred_y','bestscore_train', \
859
- 'bestscore_test','bestfeature','bestk'))
860
- #分组多任务
861
- import multiprocessing as mp
862
- for g in group:
863
- grpstart=g*procnum; grpend=(g+1)*procnum
864
- if grpend > jobnum: grpend=jobnum
865
-
866
- #创建进程池
867
- timep0 = time.perf_counter()
868
- pool=mp.Pool(processes=procnum)
869
- print("\n... Pool",g,"created with max capacity of",procnum,"processes in parallel")
870
- #建立多进程
871
- mptasks=[pool.apply_async(forecast_stock_price3,args=(dfprice,n_nextdays, \
872
- n_samples,maxk,maxFN,i,)) for i in list(range(grpstart,grpend))]
873
- pool.close()
874
- pool.join()
875
-
876
- #记录组内各个最佳结果
877
- for res in mptasks:
878
- row=res.get()
879
- result=result.append(row,ignore_index=True)
880
- print(" Completed processes for random_state",list(range(grpstart,grpend)))
881
- h0,wmp0=multisummary(result[grpstart:grpend+1],notes="Pool "+str(g),top=top)
882
- #计算组内运行时间
883
- timep1 = time.perf_counter(); elapsedp=timep1 - timep0
884
- print(" Elapsed time in Pool",g,"is",second2time(elapsedp))
885
-
886
- #排序最后结果
887
- result.sort_values(by=['bestscore_test'],ascending=True,inplace=True)
888
-
889
- #显示结果
890
- hsotest,wmprice=multisummary(result,'final',top)
891
-
892
- #计算总体运行时间
893
- time1 = time.perf_counter(); elapsed=time1 - time0
894
- print("\n... Total elapsed time is",second2time(elapsed))
895
-
896
- return hsotest,wmprice
897
-
898
- if __name__ =="__main__":
899
- ticker='MSFT'
900
- atdate="2020-4-5"
901
- n_nextdays=1
902
- minRS=0
903
- maxRS=2
904
- predicted_prices=forecast_stock_price_mp(ticker,atdate,n_nextdays, \
905
- random_state=minRS,maxRS=maxRS)
906
-
907
- #==============================================================================
908
-
909
- #==============================================================================
910
-