siat 2.0.19__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siat/grafix.py +12 -2
- siat/stock.py +35 -9
- siat/stock_china.py +525 -300
- siat/stock_china_test.py +6 -6
- siat/translate.py +2 -2
- {siat-2.0.19.dist-info → siat-2.1.5.dist-info}/METADATA +1 -1
- {siat-2.0.19.dist-info → siat-2.1.5.dist-info}/RECORD +9 -9
- {siat-2.0.19.dist-info → siat-2.1.5.dist-info}/WHEEL +0 -0
- {siat-2.0.19.dist-info → siat-2.1.5.dist-info}/top_level.txt +0 -0
siat/stock_china.py
CHANGED
@@ -20,54 +20,88 @@ from siat.security_prices import *
|
|
20
20
|
#==============================================================================
|
21
21
|
|
22
22
|
if __name__=='__main__':
|
23
|
-
ticker='600519'
|
23
|
+
ticker='600519.Ss'
|
24
24
|
|
25
|
-
|
25
|
+
dfp=get_money_flowin(ticker)
|
26
|
+
|
27
|
+
end='2023-6-15'
|
28
|
+
dfp=get_money_flowin(ticker,end)
|
29
|
+
|
30
|
+
def get_money_flowin(ticker,end='latest'):
|
26
31
|
"""
|
27
32
|
功能:抓取个股近一百个交易日的资金净流入情况,以及大盘指数的情况
|
28
|
-
ticker
|
33
|
+
ticker:个股代码,带后缀
|
29
34
|
标准化方法:原始数据
|
35
|
+
注意:目前仅支持沪深股市
|
30
36
|
"""
|
37
|
+
ticker1=ticker.upper()[:6]
|
38
|
+
exch=ticker.upper()[7:9]
|
39
|
+
|
31
40
|
import akshare as ak
|
32
41
|
import pandas as pd
|
33
42
|
|
34
43
|
#判断沪深市场
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
44
|
+
if exch=='':
|
45
|
+
l1=ticker[0]; market='sh'
|
46
|
+
if l1 in ['0','2','3']: market='sz'
|
47
|
+
#深市股票以0/2/3开头,沪市以6/9开头
|
48
|
+
else:
|
49
|
+
if exch=='SS':
|
50
|
+
market='sh'
|
51
|
+
elif exch=='SZ':
|
52
|
+
market='sz'
|
53
|
+
else:
|
54
|
+
print(" #Warning(get_money_flowin): currently only support Shanghai/Shenzhen stock exchanges")
|
55
|
+
return None
|
56
|
+
|
39
57
|
#获得个股资金流动明细
|
40
58
|
try:
|
41
|
-
df = ak.stock_individual_fund_flow(stock=
|
59
|
+
df = ak.stock_individual_fund_flow(stock=ticker1, market=market)
|
42
60
|
except:
|
43
|
-
print("#Error(
|
44
|
-
return
|
61
|
+
print(" #Error(get_money_flowin): money flow info unavailable for",ticker)
|
62
|
+
return None
|
45
63
|
|
46
64
|
df['ticker']=ticker
|
65
|
+
|
66
|
+
df['Date']=df['日期'].apply(lambda x: pd.to_datetime(x)) #不带时区的日期
|
67
|
+
df.set_index('Date',inplace=True)
|
47
68
|
df['date']=df['日期']
|
69
|
+
|
70
|
+
df['Close']=df['收盘价'].apply(lambda x: float(x))
|
71
|
+
df['Change%']=df['涨跌幅'].apply(lambda x: float(x))
|
72
|
+
|
48
73
|
#类型转换
|
49
74
|
df['netFlowInAmount_main']=df['主力净流入-净额'].apply(lambda x: float(x))
|
50
75
|
df['netFlowInAmount_small']=df['小单净流入-净额'].apply(lambda x: float(x))
|
51
76
|
df['netFlowInAmount_mid']=df['中单净流入-净额'].apply(lambda x: float(x))
|
52
77
|
df['netFlowInAmount_big']=df['大单净流入-净额'].apply(lambda x: float(x))
|
53
78
|
df['netFlowInAmount_super']=df['超大单净流入-净额'].apply(lambda x: float(x))
|
79
|
+
|
80
|
+
# 总净流入金额:可正可负
|
54
81
|
df['netFlowInAmount']=df['netFlowInAmount_main']+df['netFlowInAmount_small']+ \
|
55
|
-
|
82
|
+
df['netFlowInAmount_mid']+df['netFlowInAmount_big']+ \
|
83
|
+
df['netFlowInAmount_super']
|
56
84
|
|
57
85
|
df['netFlowInRatio%_main']=df['主力净流入-净占比'].apply(lambda x: float(x))
|
58
86
|
df['netFlowInRatio%_small']=df['小单净流入-净占比'].apply(lambda x: float(x))
|
59
87
|
df['netFlowInRatio%_mid']=df['中单净流入-净占比'].apply(lambda x: float(x))
|
60
88
|
df['netFlowInRatio%_big']=df['大单净流入-净占比'].apply(lambda x: float(x))
|
61
89
|
df['netFlowInRatio%_super']=df['超大单净流入-净占比'].apply(lambda x: float(x))
|
62
|
-
|
63
|
-
#重要:删除有缺失值的记录,确保未收盘时能预测当天的收盘价涨跌方向
|
64
|
-
df.dropna(inplace=True)
|
65
|
-
|
66
|
-
df['Close']=df['收盘价'].apply(lambda x: float(x))
|
67
|
-
df['Change%']=df['涨跌幅'].apply(lambda x: float(x))
|
68
90
|
|
69
|
-
|
70
|
-
df.
|
91
|
+
#重要:处理有缺失值的记录,确保未收盘时能预测当天的收盘价涨跌方向
|
92
|
+
#df.dropna(inplace=True)
|
93
|
+
df.fillna(0,inplace=True)
|
94
|
+
|
95
|
+
# 对数量级巨大的金额项进行对数处理,避免其对数量级小的项形成数量级压制
|
96
|
+
amtColList=['netFlowInAmount_main','netFlowInAmount_small','netFlowInAmount_mid', \
|
97
|
+
'netFlowInAmount_big','netFlowInAmount_super','netFlowInAmount']
|
98
|
+
import math
|
99
|
+
for a in amtColList:
|
100
|
+
# 取对数
|
101
|
+
df[a+'_ln']=df[a].apply(lambda x: math.log(x) if x>0 else -math.log(-x) if x<0 else 0)
|
102
|
+
|
103
|
+
# 删除原有金额项目
|
104
|
+
df.drop(amtColList,axis=1,inplace=True)
|
71
105
|
|
72
106
|
#去掉不用的字段
|
73
107
|
dfdroplist=['主力净流入-净额','小单净流入-净额','中单净流入-净额','大单净流入-净额', \
|
@@ -76,36 +110,47 @@ def get_money_flowin(ticker):
|
|
76
110
|
'收盘价','涨跌幅','日期']
|
77
111
|
df.drop(labels=dfdroplist,axis=1,inplace=True)
|
78
112
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
113
|
+
df.sort_index(ascending=True,inplace=True)
|
114
|
+
fromdate=df['date'].values[0]
|
115
|
+
todate=df['date'].values[-1]
|
116
|
+
|
117
|
+
#获得大盘指数和交易量
|
118
|
+
dpindex="000001.SS" #上证综合指数
|
119
|
+
if market == 'sz': dpindex="399001.SZ" #深圳成分指数
|
120
|
+
df_dp_tmp=get_price(dpindex,fromdate,todate)
|
121
|
+
|
122
|
+
df_dp_tmp['Volume_mkt']=df_dp_tmp['Volume'].apply(lambda x: math.log(x))
|
123
|
+
df_dp_tmp['Close_mkt']=df_dp_tmp['Close']
|
124
|
+
df_dp=df_dp_tmp[['Volume_mkt','Close_mkt']]
|
125
|
+
|
126
|
+
# 获得股票交易量
|
127
|
+
df_stk_tmp=get_price(ticker,fromdate,todate)
|
128
|
+
df_stk_tmp['Volume_ln']=df_stk_tmp['Volume'].apply(lambda x: math.log(x))
|
129
|
+
df_stk=df_stk_tmp[['Open','High','Low','Adj Close','Volume_ln']]
|
130
|
+
|
131
|
+
# 合并
|
132
|
+
df_stk_mkt=pd.merge(df_stk,df_dp,how='left',left_index=True,right_index=True)
|
133
|
+
|
134
|
+
dfp_tmp=pd.merge(df,df_stk_mkt,how='left',left_index=True,right_index=True)
|
135
|
+
newColList=['date','Close','Change%','Adj Close','Open','High','Low','Volume_ln', \
|
136
|
+
'netFlowInRatio%_main','netFlowInRatio%_small','netFlowInRatio%_mid', \
|
137
|
+
'netFlowInRatio%_big','netFlowInRatio%_super', \
|
138
|
+
'netFlowInAmount_main_ln','netFlowInAmount_small_ln','netFlowInAmount_mid_ln', \
|
139
|
+
'netFlowInAmount_big_ln','netFlowInAmount_super_ln', 'netFlowInAmount_ln', \
|
140
|
+
'Close_mkt','Volume_mkt']
|
141
|
+
dfp=dfp_tmp[newColList]
|
142
|
+
|
143
|
+
if end=='latest':
|
144
|
+
dfp2=dfp
|
145
|
+
else:
|
146
|
+
jieguo,end2=check_date2(end)
|
147
|
+
if not jieguo:
|
148
|
+
dfp2=dfp
|
149
|
+
else:
|
150
|
+
dfp2=dfp[dfp['date'] <= end2]
|
104
151
|
|
105
|
-
return
|
152
|
+
return dfp2
|
106
153
|
|
107
|
-
if __name__=='__main__':
|
108
|
-
dfp=get_money_flowin('600519')
|
109
154
|
|
110
155
|
#==============================================================================
|
111
156
|
# 对特征数据进行预处理
|
@@ -169,9 +214,13 @@ if __name__=='__main__':
|
|
169
214
|
#==============================================================================
|
170
215
|
if __name__=='__main__':
|
171
216
|
ndays=1
|
172
|
-
preCumTimes=
|
217
|
+
preCumTimes=5
|
218
|
+
|
219
|
+
dfp=get_money_flowin('600519.SS')
|
220
|
+
|
221
|
+
X,ydf,X_new=make_sample(dfp,ndays=1)
|
173
222
|
|
174
|
-
def make_sample(dfp,ndays=1,preCumTimes=
|
223
|
+
def make_sample(dfp,ndays=1,preCumTimes=5):
|
175
224
|
"""
|
176
225
|
功能:构造适合机器学习的样本
|
177
226
|
ndays:预测未来几个交易日
|
@@ -183,55 +232,68 @@ def make_sample(dfp,ndays=1,preCumTimes=1):
|
|
183
232
|
preDays=ndays * preCumTimes
|
184
233
|
|
185
234
|
#构造过去一段时间资金净流入累加值
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
235
|
+
amtColList=[]
|
236
|
+
colList=list(dfp)
|
237
|
+
for c in colList:
|
238
|
+
if 'Amount' in c:
|
239
|
+
amtColList=amtColList+[c]
|
240
|
+
|
241
|
+
for c in amtColList:
|
242
|
+
dfp[c+'_cum']=dfp[c].rolling(window=preDays,min_periods=1).sum()
|
192
243
|
|
193
244
|
#构造过去一段时间资金净流入比例均值
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
245
|
+
ratioColList=[]
|
246
|
+
for c in colList:
|
247
|
+
if 'Ratio' in c:
|
248
|
+
ratioColList=ratioColList+[c]
|
249
|
+
|
250
|
+
for c in ratioColList:
|
251
|
+
dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
|
252
|
+
|
200
253
|
#构造过去一段时间大盘指数的均值和标准差
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
254
|
+
mktColList=['Close_mkt','Volume_mkt']
|
255
|
+
for c in mktColList:
|
256
|
+
dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
|
257
|
+
dfp[c+'_std']=dfp[c].rolling(window=preDays,min_periods=1).std()
|
205
258
|
|
206
259
|
#重要:去掉前几行,此处位置敏感
|
207
|
-
dfp.dropna(
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
260
|
+
dfp1=dfp.dropna()
|
261
|
+
if len(dfp1)==0:
|
262
|
+
print(" #Error(make_sample): dropna caused empty dataframe, process stopped")
|
263
|
+
return None,None
|
264
|
+
|
265
|
+
#按照ndays滚动
|
266
|
+
newColList=list(dfp1)
|
267
|
+
newColList.remove('date')
|
268
|
+
dfp2=dfp1[newColList]
|
269
|
+
for c in newColList:
|
270
|
+
dfp2[c+'_roll']=dfp2[c].rolling(window=ndays,min_periods=1).mean()
|
271
|
+
|
272
|
+
newColList2=list(dfp2)
|
273
|
+
newColList3=[]
|
274
|
+
for c in newColList2:
|
275
|
+
if '_roll' in c:
|
276
|
+
newColList3=newColList3+[c]
|
277
|
+
newColList3=newColList3+['Close','Change%']
|
278
|
+
dfp3=dfp2[newColList3]
|
279
|
+
|
280
|
+
#添加未来一个单位(ndays)的股价信息
|
281
|
+
dfp3['Close_next']=dfp3['Close'].shift(-1)
|
282
|
+
dfp3['Change%_next']=dfp3['Change%'].shift(-1)
|
283
|
+
ylist=['Close_next','Change%_next']
|
284
|
+
|
285
|
+
#供训练用的数据集
|
286
|
+
XCollist=list(dfp3)
|
287
|
+
for c in ylist:
|
288
|
+
XCollist.remove(c)
|
289
|
+
dfp4=dfp3.dropna()
|
290
|
+
X = dfp4[XCollist]
|
291
|
+
ydf = dfp4[ylist]
|
292
|
+
|
293
|
+
# X_new
|
294
|
+
X_new = dfp3.tail(1)[XCollist]
|
295
|
+
|
296
|
+
return X,ydf,X_new
|
235
297
|
|
236
298
|
#==============================================================================
|
237
299
|
# 训练模型,获得最优模型参数,进行预测
|
@@ -254,6 +316,8 @@ def train_predict_knn(X,ydf,noday=1,y='Close', \
|
|
254
316
|
ydf:标签矩阵
|
255
317
|
nodays:预测未来第几天
|
256
318
|
y:标签,默认'Close'为股价,'Change%'为涨跌幅,'Direction'为涨跌方向
|
319
|
+
|
320
|
+
未用!
|
257
321
|
"""
|
258
322
|
ylist=['Close','Change%','Direction']
|
259
323
|
if not (y in ylist):
|
@@ -332,6 +396,8 @@ def train_predict_knn(X,ydf,noday=1,y='Close', \
|
|
332
396
|
if votes > len(r2): votes=len(r2)
|
333
397
|
r2head=r2.head(votes)
|
334
398
|
|
399
|
+
# 未完
|
400
|
+
|
335
401
|
#==============================================================================
|
336
402
|
# 训练,获得最优模型参数
|
337
403
|
#==============================================================================
|
@@ -347,6 +413,8 @@ def training_knn_clf(scaler_X,ydf,ndays=1,max_neighbors=10,max_p=6,cv=5,rs=0):
|
|
347
413
|
功能:对(X,y)
|
348
414
|
scaler_X: 特征矩阵
|
349
415
|
y:标签矩阵
|
416
|
+
|
417
|
+
未用!
|
350
418
|
'''
|
351
419
|
|
352
420
|
#获得分类变量y
|
@@ -397,71 +465,61 @@ def training_knn_clf(scaler_X,ydf,ndays=1,max_neighbors=10,max_p=6,cv=5,rs=0):
|
|
397
465
|
# Forecasting stock price directions by money flow in/out, using knn
|
398
466
|
#==============================================================================
|
399
467
|
if __name__=='__main__':
|
400
|
-
ticker='600519'
|
468
|
+
ticker='600519.SS'
|
469
|
+
ticker='000723.SZ'
|
470
|
+
|
401
471
|
ndays=1
|
402
|
-
|
403
|
-
diff=0.
|
472
|
+
min_score=0.9
|
473
|
+
diff=0.01
|
404
474
|
votes=100
|
405
475
|
max_neighbours=10
|
406
476
|
max_RS=10
|
477
|
+
printout=True
|
478
|
+
|
479
|
+
df=get_money_flowin(ticker,end='2023-6-15')
|
480
|
+
decision,known_close,known_day=price_direction_knn(ticker,df,ndays=1)
|
407
481
|
|
408
|
-
def price_direction_knn(ticker,df,ndays=1,diff=0.
|
482
|
+
def price_direction_knn(ticker,df,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=10,printout=True):
|
409
483
|
|
410
484
|
"""
|
411
485
|
功能:基于个股资金流动预测次日股票涨跌方向,涨或跌
|
412
486
|
ticker:股票代码,无后缀
|
413
487
|
df:个股资金净流入
|
414
|
-
dp:大盘信息
|
415
488
|
ndays:预测几天后的股价涨跌方向,默认1天
|
416
|
-
|
417
|
-
|
418
|
-
votes
|
489
|
+
diff:泛化精度,越小越好
|
490
|
+
min_score:最小测试分数
|
491
|
+
votes:软表决票数
|
419
492
|
max_neighbours:最大邻居个数
|
420
493
|
max_RS:最大随机数种子
|
421
494
|
"""
|
422
495
|
import pandas as pd
|
423
496
|
|
424
|
-
|
425
|
-
df['
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
'netFlowInChg_super','netFlowInChg','netFlowInRatio%Chg_main','netFlowInRatio%Chg_small', \
|
449
|
-
'netFlowInRatio%Chg_mid','netFlowInRatio%Chg_big','netFlowInRatio%Chg_super', \
|
450
|
-
'Close','Change%','dpCloseChg','dpVolumeChg','nextClose','nextChange%','nextDirection']]
|
451
|
-
|
452
|
-
#记录最新指标,用于预测次日涨跌
|
453
|
-
x_last=df2.copy().tail(1)
|
454
|
-
today=x_last['date'].values[0]
|
455
|
-
today_close=x_last['Close'].values[0]
|
456
|
-
x_last.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1,inplace=True)
|
457
|
-
X_new = x_last.head(1).values
|
458
|
-
|
459
|
-
#建立样本:特征序列
|
460
|
-
df2.dropna(inplace=True)
|
461
|
-
X=df2.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1)
|
462
|
-
|
497
|
+
# 判断是否周末无交易免预测
|
498
|
+
known_day=df.tail(1)['date'].values[0]
|
499
|
+
|
500
|
+
tdate=date_adjust(known_day, adjust=ndays)
|
501
|
+
tdate_pd=pd.to_datetime(tdate)
|
502
|
+
tdate_wd=tdate_pd.weekday()
|
503
|
+
if tdate_wd in [5,6]:
|
504
|
+
print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
|
505
|
+
decision='~'
|
506
|
+
known_close=df.tail(1)['Close'].values[0]
|
507
|
+
|
508
|
+
return decision,known_close,known_day
|
509
|
+
|
510
|
+
#构造样本:标签ydf,特征矩阵X,预测X_new
|
511
|
+
X,ydf,X_new=make_sample(df,ndays=ndays)
|
512
|
+
|
513
|
+
#X_new1=X_new.head(1).values
|
514
|
+
known_day_np=X_new.index.values[0]
|
515
|
+
known_day_pd=pd.to_datetime(known_day_np)
|
516
|
+
known_day=known_day_pd.strftime("%Y-%m-%d")
|
517
|
+
known_close=X_new['Close'].values[0]
|
518
|
+
|
519
|
+
ydf['nextDirection']=ydf['Change%_next'].apply(lambda x: 1 if x>0 else -1 if x <0 else 0)
|
520
|
+
|
463
521
|
#建立样本:标签序列
|
464
|
-
y1=
|
522
|
+
y1=ydf['nextDirection'] #二分类
|
465
523
|
#y2=df2['nextChange%'] #回归
|
466
524
|
#y3=df2['nextClose'] #回归
|
467
525
|
|
@@ -480,17 +538,18 @@ def price_direction_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
|
|
480
538
|
mlist4=['cityblock','euclidean','minkowski','cosine']
|
481
539
|
mlist=mlist1+mlist2+mlist3+mlist4
|
482
540
|
rslist=list(range(0,max_RS+1))
|
541
|
+
|
483
542
|
results=pd.DataFrame(columns=('spread','train_score','test_score', \
|
484
543
|
'neighbours','weight','metric','random','pred'))
|
485
544
|
print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
|
486
|
-
print(' Progress: 0
|
545
|
+
print(' Progress: 0% ',end='')
|
487
546
|
for n in nlist:
|
488
547
|
for w in wlist:
|
489
548
|
for m in mlist:
|
490
549
|
for rs in rslist:
|
491
550
|
knn1=KNeighborsClassifier(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
|
492
551
|
X_train,X_test,y_train,y_test=train_test_split(X,y1,random_state=rs)
|
493
|
-
knn1.fit(X_train, y_train)
|
552
|
+
tmp=knn1.fit(X_train, y_train) #调试时去掉tmp=可跟踪模型参数
|
494
553
|
train_score=round(knn1.score(X_train, y_train),3)
|
495
554
|
test_score=round(knn1.score(X_test, y_test),3)
|
496
555
|
prediction=knn1.predict(X_new)[0]
|
@@ -500,65 +559,145 @@ def price_direction_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
|
|
500
559
|
'test_score':test_score,'neighbours':n, \
|
501
560
|
'weight':w,'metric':m,'random':rs,'pred':prediction})
|
502
561
|
results=results.append(row,ignore_index=True)
|
503
|
-
print(int(n/n_num*100),'\b
|
562
|
+
print(int(n/n_num*100),'\b% ',end='')
|
504
563
|
print('done.')
|
505
564
|
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
565
|
+
|
566
|
+
# 过滤训练/测试分数差的结果
|
567
|
+
r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
|
568
|
+
# 避免min_score设置得太高导致无结果,或者数量太多
|
569
|
+
r0_len=len(r0)
|
570
|
+
#if r0_len==0 or r0_len > votes:
|
571
|
+
if r0_len==0:
|
572
|
+
train_score_max=results['train_score'].max()
|
573
|
+
test_score_max=results['test_score'].max()
|
574
|
+
min_score_new=min(train_score_max,test_score_max)
|
575
|
+
r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
|
576
|
+
|
577
|
+
if len(r0)==0:
|
578
|
+
spread_quantile=results['spread'].quantile(q=0.01)
|
579
|
+
r0_spread=results[(results['spread']<spread_quantile)]
|
580
|
+
if len(r0_spread)==0:
|
581
|
+
r0_spread=results[(results['spread']<=spread_quantile)]
|
582
|
+
|
583
|
+
test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
|
584
|
+
r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
|
585
|
+
if len(r0)==0:
|
586
|
+
r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
|
587
|
+
|
588
|
+
train_score_max=r0['train_score'].max()
|
589
|
+
test_score_max=r0['test_score'].max()
|
590
|
+
min_score_new=min(train_score_max,test_score_max)
|
591
|
+
|
592
|
+
print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
|
593
|
+
|
511
594
|
#去掉泛化效果差的结果
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
r2head=r2.head(votes)
|
595
|
+
r1=r0[r0['spread'] <= diff] #限定泛化差距
|
596
|
+
r1_len=len(r1)
|
597
|
+
#if r1_len==0 or r1_len>=votes:
|
598
|
+
if r1_len==0:
|
599
|
+
diff_new=r0['spread'].min()
|
600
|
+
r1=r0[r0['spread'] <= diff_new]
|
601
|
+
print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
|
520
602
|
|
521
|
-
zhang=len(r2head[r2head['pred']==1])
|
522
|
-
die=len(r2head[r2head['pred']==-1])
|
523
603
|
|
524
|
-
|
525
|
-
|
526
|
-
|
604
|
+
"""
|
605
|
+
# 首选测试效果好的
|
606
|
+
r0=results[(results['test_score']>=min_score)]
|
607
|
+
if len(r0)==0:
|
608
|
+
test_quantile=results['test_score'].quantile(q=0.99,interpolation='nearest')
|
609
|
+
r0=results[(results['test_score']>test_quantile)]
|
610
|
+
if len(r0)==0:
|
611
|
+
r0=results[(results['test_score']>=test_quantile)]
|
612
|
+
|
613
|
+
print(" #Warning: filtering parameter min_score for testing has been adjusted to",test_quantile)
|
614
|
+
"""
|
615
|
+
|
616
|
+
"""
|
617
|
+
# 首选训练效果好的
|
618
|
+
r0=results[(results['train_score']>=min_score) & (results['train_score']<1.0)]
|
619
|
+
if len(r0)==0:
|
620
|
+
r0_train=results[(results['train_score']>=results['test_score']) & results['train_score']<1.0]
|
621
|
+
train_quantile=r0_train['train_score'].quantile(q=0.99,interpolation='nearest')
|
622
|
+
r0=r0_train[(r0_train['train_score']>train_quantile)]
|
623
|
+
if len(r0)==0:
|
624
|
+
r0=r0_train[(r0_train['train_score']>=train_quantile)]
|
625
|
+
|
626
|
+
print(" #Warning: filtering parameter min_score for training has been adjusted to",train_quantile)
|
627
|
+
|
628
|
+
# 再选泛化效果好的
|
629
|
+
r1=r0[(r0['spread']<=diff)]
|
630
|
+
if len(r1)==0:
|
631
|
+
spread_quantile=r0['spread'].quantile(q=0.05,interpolation='nearest')
|
632
|
+
r1=r0[(r0['spread']<spread_quantile)]
|
633
|
+
if len(r1)==0:
|
634
|
+
r1=r0[(r0['spread']<=spread_quantile)]
|
635
|
+
|
636
|
+
print(" #Warning: filtering parameter diff has been adjusted to",spread_quantile)
|
637
|
+
"""
|
527
638
|
|
528
|
-
|
529
|
-
|
530
|
-
|
639
|
+
#优先查看泛化效果最优的结果:spread最小
|
640
|
+
votes=min(votes,len(r1))
|
641
|
+
r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
|
642
|
+
#优先查看测试分数最高的结果:test_score最大
|
643
|
+
r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
|
644
|
+
|
645
|
+
r3head=r3
|
531
646
|
|
532
|
-
|
647
|
+
zhang=len(r3head[r3head['pred']==1])
|
648
|
+
die=len(r3head[r3head['pred']==-1])
|
649
|
+
|
650
|
+
decision='+'
|
651
|
+
decision_text='HIGHER'
|
652
|
+
if die != 0:
|
653
|
+
if zhang >= die * 2.0: decision='2+'
|
654
|
+
if zhang >= die * 3.0: decision='3+'
|
655
|
+
|
656
|
+
if die > zhang:
|
657
|
+
decision='-'
|
658
|
+
decision_text='LOWER'
|
659
|
+
if zhang != 0:
|
660
|
+
if die >= zhang * 2.0: decision='2-'
|
661
|
+
if die >= zhang * 3.0: decision='3-'
|
662
|
+
|
663
|
+
#if abs(zhang-die)/((zhang+die)/2) < 0.05: decision='?'
|
664
|
+
if zhang==die:
|
665
|
+
decision='='
|
666
|
+
decision_text='FLAT'
|
533
667
|
|
534
668
|
if not printout: return decision,today_close,today
|
535
669
|
|
536
|
-
print(" Model
|
537
|
-
print("
|
538
|
-
|
539
|
-
|
670
|
+
print(" Model voting for stock price after "+str(ndays)+" trading days: Higher("+str(zhang)+'), Lower('+str(die)+')')
|
671
|
+
print(" "+codetranslate(ticker)+': previously closed '+str(known_close)+' @ '+str(known_day))
|
672
|
+
|
673
|
+
ndate=date_adjust(known_day,adjust=ndays)
|
674
|
+
print(" Prediction to close at",decision_text,"price trend ("+decision+') @',ndate)
|
675
|
+
|
676
|
+
return decision,known_close,known_day
|
540
677
|
|
541
|
-
if __name__=='__main__':
|
542
|
-
df=price_direction_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
|
543
678
|
|
544
679
|
#==============================================================================
|
545
680
|
if __name__=='__main__':
|
546
|
-
ticker='600519'
|
681
|
+
ticker='600519.SS'
|
547
682
|
ndays=1
|
548
|
-
|
549
|
-
|
683
|
+
diff=0.01
|
684
|
+
min_score=0.9
|
550
685
|
votes=100
|
551
686
|
max_neighbours=3
|
552
|
-
max_RS=
|
687
|
+
max_RS=20
|
688
|
+
preproctype='0-1'
|
689
|
+
|
690
|
+
df=forecast_direction_knn(ticker,ndays=1)
|
691
|
+
df=forecast_direction_knn(ticker,ndays=3)
|
553
692
|
|
554
|
-
def forecast_direction_knn(ticker,ndays=1,diff=0.
|
693
|
+
def forecast_direction_knn(ticker,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=20,preproctype='0-1',end='latest'):
|
555
694
|
|
556
695
|
"""
|
557
696
|
功能:基于个股资金流动预测未来股票涨跌方向,涨或跌
|
558
697
|
ticker:股票代码,无后缀
|
559
698
|
ndays:预测几天后的股价涨跌方向,默认1天
|
560
|
-
|
561
|
-
|
699
|
+
diff:泛化精度,越小越好
|
700
|
+
min_score:最小训练/学习分数
|
562
701
|
votes:软表决票数,默认最大100
|
563
702
|
max_neighbours:最大邻居个数,默认10个
|
564
703
|
max_RS:最大随机数种子,默认最大为10
|
@@ -566,96 +705,98 @@ def forecast_direction_knn(ticker,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
|
|
566
705
|
print("\nStart forecasting, it may take great time, please wait ...")
|
567
706
|
|
568
707
|
#抓取个股资金净流入情况df和大盘指数情况dp
|
569
|
-
|
570
|
-
scaler_X=preproc(X,preproctype=preproctype)
|
571
|
-
|
572
|
-
#测试用
|
573
|
-
df=df0.copy()
|
708
|
+
df=get_money_flowin(ticker,end=end)
|
574
709
|
|
575
710
|
#预测未来股价涨跌
|
576
711
|
decisionlist=[]
|
577
712
|
for nd in list(range(1,ndays+1)):
|
578
|
-
decision,
|
579
|
-
|
713
|
+
decision,known_close,known_day=price_direction_knn(ticker,df,ndays=nd, \
|
714
|
+
diff=diff, \
|
715
|
+
min_score=min_score, \
|
716
|
+
votes=votes, \
|
717
|
+
max_neighbours=max_neighbours, \
|
718
|
+
max_RS=max_RS)
|
580
719
|
decisionlist=decisionlist+[decision]
|
581
720
|
|
582
|
-
print("\nStock information:",ticker,
|
583
|
-
|
584
|
-
|
585
|
-
print(
|
586
|
-
|
587
|
-
|
588
|
-
|
721
|
+
print("\nStock information:",codetranslate(ticker),'\b, closed',known_close,'@',known_day)
|
722
|
+
for nd in list(range(1,ndays+1)):
|
723
|
+
ndate=date_adjust(known_day, adjust=nd)
|
724
|
+
print("Forecasting stock close price trend on",ndate,end=': ')
|
725
|
+
pred=decisionlist[nd-1]
|
726
|
+
if '-' in pred:
|
727
|
+
print("LOWER,",pred)
|
728
|
+
if '+' in pred:
|
729
|
+
print("HIGHER,",pred)
|
730
|
+
if '~' in pred:
|
731
|
+
print("market closed")
|
732
|
+
if '=' in pred:
|
733
|
+
print("least different")
|
734
|
+
|
735
|
+
return decisionlist,known_close,known_day
|
589
736
|
|
590
|
-
if __name__=='__main__':
|
591
|
-
df=forecast_direction_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
|
592
737
|
|
593
738
|
#==============================================================================
|
594
739
|
# Forecasting stock prices by money flow in/out, using knn
|
595
740
|
#==============================================================================
|
596
741
|
|
597
742
|
if __name__=='__main__':
|
598
|
-
ticker='600519'
|
743
|
+
ticker='600519.SS'
|
744
|
+
ticker='605011.SS'
|
745
|
+
|
599
746
|
ndays=1
|
600
|
-
|
601
|
-
|
747
|
+
diff=0.01
|
748
|
+
min_score=0.8
|
602
749
|
votes=100
|
603
750
|
max_neighbours=10
|
604
751
|
max_RS=10
|
752
|
+
printout=True
|
753
|
+
|
754
|
+
df=get_money_flowin(ticker,end='2023-6-15')
|
755
|
+
pred_result=price_price_knn(ticker,df,ndays=1)
|
756
|
+
pred_result=price_price_knn(ticker,df,ndays=3)
|
605
757
|
|
606
|
-
def price_price_knn(ticker,df,ndays=1,diff=0.
|
607
|
-
|
758
|
+
#def price_price_knn(ticker,df,ndays=1,diff=0.01,min_score=0.8,votes=100,max_neighbours=10,max_RS=20,printout=True):
|
759
|
+
def price_price_knn(ticker,df,ndays=1,max_neighbours=10,max_RS=20,printout=True):
|
760
|
+
|
608
761
|
"""
|
609
762
|
功能:基于个股资金流动预测次日股票价格
|
610
763
|
ticker:股票代码,无后缀
|
611
764
|
df:个股资金净流入信息
|
612
|
-
dp:大盘信息
|
613
765
|
ndays:预测几天后的股价涨跌方向,默认1天
|
614
|
-
|
615
|
-
|
616
|
-
votes
|
766
|
+
diff:弃用。泛化精度,越小越好
|
767
|
+
min_score:弃用。最小训练/测试分数,越大越好
|
768
|
+
votes:弃用。软表决均值,默认100
|
617
769
|
max_neighbours:最大邻居个数
|
618
770
|
max_RS:最大随机数种子
|
619
771
|
"""
|
620
772
|
import pandas as pd
|
773
|
+
|
774
|
+
# 判断是否周末无交易免预测
|
775
|
+
known_day=df.tail(1)['date'].values[0]
|
776
|
+
|
777
|
+
tdate=date_adjust(known_day, adjust=ndays)
|
778
|
+
tdate_pd=pd.to_datetime(tdate)
|
779
|
+
tdate_wd=tdate_pd.weekday()
|
780
|
+
if tdate_wd in [5,6]:
|
781
|
+
print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
|
782
|
+
decision=0
|
783
|
+
decision_score=0
|
784
|
+
known_close=df.tail(1)['Close'].values[0]
|
785
|
+
|
786
|
+
return decision,decision_score,known_close,known_day
|
787
|
+
|
788
|
+
#构造样本:标签ydf,特征矩阵X,预测X_new
|
789
|
+
X,ydf,X_new=make_sample(df,ndays=ndays)
|
621
790
|
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
#构造特征
|
628
|
-
df['netFlowInChg_main']=df['netFlowInAmount_main']/(df['netFlowInAmount_main'].shift(ndays))
|
629
|
-
df['netFlowInChg_small']=df['netFlowInAmount_small']/(df['netFlowInAmount_small'].shift(ndays))
|
630
|
-
df['netFlowInChg_mid']=df['netFlowInAmount_mid']/(df['netFlowInAmount_mid'].shift(ndays))
|
631
|
-
df['netFlowInChg_big']=df['netFlowInAmount_big']/(df['netFlowInAmount_big'].shift(ndays))
|
632
|
-
df['netFlowInChg_super']=df['netFlowInAmount_super']/(df['netFlowInAmount_super'].shift(ndays))
|
633
|
-
df['netFlowInChg']=df['netFlowInAmount']/(df['netFlowInAmount'].shift(ndays))
|
634
|
-
|
635
|
-
df['dpCloseChg']=df['dpClose']/(df['dpClose'].shift(ndays))
|
636
|
-
df['dpVolumeChg']=df['dpVolume']/(df['dpVolume'].shift(ndays))
|
637
|
-
|
638
|
-
df2=df[['date','netFlowInChg_main',
|
639
|
-
'netFlowInChg_small','netFlowInChg_mid','netFlowInChg_big', \
|
640
|
-
'netFlowInChg_super','netFlowInChg','netFlowInRatio%_main','netFlowInRatio%_small', \
|
641
|
-
'netFlowInRatio%_mid','netFlowInRatio%_big','netFlowInRatio%_super', \
|
642
|
-
'Close','Change%','dpCloseChg','dpVolumeChg','nextClose','nextChange%','nextDirection']]
|
643
|
-
|
644
|
-
#记录最新指标,用于预测次日涨跌
|
645
|
-
x_last=df2.copy().tail(1)
|
646
|
-
today=x_last['date'].values[0]
|
647
|
-
today_close=x_last['Close'].values[0]
|
648
|
-
x_last.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1,inplace=True)
|
649
|
-
X_new = x_last.head(1).values
|
650
|
-
|
651
|
-
#建立样本:特征序列
|
652
|
-
df2.dropna(inplace=True)
|
653
|
-
X=df2.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1)
|
791
|
+
known_day_np=X_new.index.values[0]
|
792
|
+
known_day_pd=pd.to_datetime(known_day_np)
|
793
|
+
known_day=known_day_pd.strftime("%Y-%m-%d")
|
794
|
+
known_close=X_new['Close'].values[0]
|
654
795
|
|
655
796
|
#建立样本:标签序列
|
656
797
|
#y1=df2['nextDirection'] #二分类
|
657
798
|
#y2=df2['nextChange%'] #回归
|
658
|
-
y3=
|
799
|
+
y3=ydf['Close_next'] #回归
|
659
800
|
|
660
801
|
#拆分训练集和测试集:y1
|
661
802
|
from sklearn.model_selection import train_test_split
|
@@ -672,10 +813,11 @@ def price_price_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neig
|
|
672
813
|
mlist4=['cityblock','euclidean','minkowski','cosine']
|
673
814
|
mlist=mlist1+mlist2+mlist3+mlist4
|
674
815
|
rslist=list(range(0,max_RS+1))
|
816
|
+
|
675
817
|
results=pd.DataFrame(columns=('spread','train_score','test_score', \
|
676
818
|
'neighbours','weight','metric','random','pred'))
|
677
819
|
print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
|
678
|
-
print(' Progress: 0
|
820
|
+
print(' Progress: 0% ',end='')
|
679
821
|
for n in nlist:
|
680
822
|
for w in wlist:
|
681
823
|
for m in mlist:
|
@@ -683,121 +825,204 @@ def price_price_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neig
|
|
683
825
|
try:
|
684
826
|
knn1=KNeighborsRegressor(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
|
685
827
|
X_train,X_test,y_train,y_test=train_test_split(X,y3,random_state=rs)
|
686
|
-
knn1.fit(X_train, y_train)
|
828
|
+
tmp=knn1.fit(X_train, y_train)
|
687
829
|
train_score=round(knn1.score(X_train, y_train),3)
|
688
830
|
test_score=round(knn1.score(X_test, y_test),3)
|
689
831
|
prediction=knn1.predict(X_new)[0]
|
690
832
|
except:
|
691
|
-
print(" #Bug: n=",n,"w=",w,"m=",m,"rs=",rs)
|
692
|
-
break
|
833
|
+
#print(" #Bug parameters: n=",n,"w=",w,"m=",m,"rs=",rs)
|
834
|
+
#break
|
835
|
+
continue
|
693
836
|
spread=abs(round(train_score-test_score,3))
|
694
837
|
|
695
838
|
row=pd.Series({'spread':spread,'train_score':train_score, \
|
696
839
|
'test_score':test_score,'neighbours':n, \
|
697
840
|
'weight':w,'metric':m,'random':rs,'pred':prediction})
|
698
841
|
results=results.append(row,ignore_index=True)
|
699
|
-
print(int(n/n_num*100),'\b
|
700
|
-
print('
|
842
|
+
print(int(n/n_num*100),'\b% ',end='')
|
843
|
+
print('.')
|
844
|
+
|
845
|
+
# 过滤训练/测试分数差的结果
|
846
|
+
"""
|
847
|
+
r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
|
848
|
+
|
849
|
+
# 如果最低分数设置太高
|
850
|
+
r0_len=len(r0)
|
851
|
+
if r0_len==0:
|
852
|
+
train_score_max=results['train_score'].max()
|
853
|
+
test_score_max=results['test_score'].max()
|
854
|
+
min_score_new=min(train_score_max,test_score_max)
|
855
|
+
r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
|
856
|
+
|
857
|
+
if len(r0)==0:
|
858
|
+
spread_quantile=results['spread'].quantile(q=0.01)
|
859
|
+
r0_spread=results[(results['spread']<spread_quantile)]
|
860
|
+
if len(r0_spread)==0:
|
861
|
+
r0_spread=results[(results['spread']<=spread_quantile)]
|
862
|
+
|
863
|
+
test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
|
864
|
+
r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
|
865
|
+
if len(r0)==0:
|
866
|
+
r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
|
867
|
+
|
868
|
+
train_score_max=r0['train_score'].max()
|
869
|
+
test_score_max=r0['test_score'].max()
|
870
|
+
min_score_new=min(train_score_max,test_score_max)
|
871
|
+
|
872
|
+
print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
|
873
|
+
"""
|
874
|
+
|
875
|
+
train_score_max=results['train_score'].max()
|
876
|
+
test_score_max=results['test_score'].max()
|
877
|
+
min_score_new=min(train_score_max,test_score_max)
|
878
|
+
r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
|
879
|
+
|
880
|
+
if len(r0)==0:
|
881
|
+
spread_quantile=results['spread'].quantile(q=0.01)
|
882
|
+
r0_spread=results[(results['spread']<spread_quantile)]
|
883
|
+
if len(r0_spread)==0:
|
884
|
+
r0_spread=results[(results['spread']<=spread_quantile)]
|
885
|
+
|
886
|
+
test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
|
887
|
+
r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
|
888
|
+
if len(r0)==0:
|
889
|
+
r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
|
890
|
+
|
891
|
+
train_score_max=r0['train_score'].max()
|
892
|
+
test_score_max=r0['test_score'].max()
|
893
|
+
min_score_new=min(train_score_max,test_score_max)
|
701
894
|
|
702
|
-
#去掉严重过拟合的结果
|
703
|
-
r0=results[results['train_score'] < 1]
|
704
|
-
#去掉训练集、测试集分数不过半的模型
|
705
|
-
r0=r0[r0['train_score'] > min_score]
|
706
|
-
r0=r0[r0['test_score'] > min_score]
|
707
895
|
#去掉泛化效果差的结果
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
896
|
+
"""
|
897
|
+
r1=r0[r0['spread'] <= diff] #限定泛化差距
|
898
|
+
r1_len=len(r1)
|
899
|
+
|
900
|
+
if r1_len==0:
|
901
|
+
diff_new=r0['spread'].min()
|
902
|
+
r1=r0[r0['spread'] <= diff_new]
|
903
|
+
print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
|
904
|
+
"""
|
716
905
|
|
906
|
+
diff_new=r0['spread'].min()
|
907
|
+
r1=r0[r0['spread'] == diff_new]
|
717
908
|
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
909
|
+
#优先查看泛化效果最优的结果:spread最小
|
910
|
+
"""
|
911
|
+
votes=min(votes,len(r1))
|
912
|
+
r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
|
913
|
+
#优先查看测试分数最高的结果:test_score最大
|
914
|
+
r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
|
915
|
+
"""
|
916
|
+
|
917
|
+
r3head=r1
|
918
|
+
#平均股价
|
919
|
+
decision=round(r3head['pred'].mean(),2)
|
920
|
+
decision_score=round(r3head['test_score'].mean(),3)
|
724
921
|
|
922
|
+
"""
|
923
|
+
r3head['w_pred']=r3head['pred'] * r3head['test_score']
|
924
|
+
w_pred_sum=r3head['w_pred'].sum()
|
925
|
+
test_score_sum=r3head['test_score'].sum()
|
926
|
+
decision=round(w_pred_sum / test_score_sum,2)
|
927
|
+
decision_score=round(r3head['test_score'].mean(),2)
|
928
|
+
"""
|
725
929
|
"""
|
726
930
|
#股价中位数:偶尔出现奇怪的错误,未找到原因
|
727
|
-
decision0=
|
728
|
-
pos=list(
|
729
|
-
decision_score0=list(
|
931
|
+
decision0=r3head['pred'].median()
|
932
|
+
pos=list(r3head['pred']).index(decision0)
|
933
|
+
decision_score0=list(r3head['test_score'])[pos]
|
730
934
|
decision=round(decision0,2)
|
731
935
|
decision_score=round(decision_score0,2)
|
732
936
|
"""
|
733
937
|
import numpy as np
|
734
|
-
if
|
938
|
+
if np.isnan(decision): decision='?'
|
735
939
|
|
736
940
|
if not printout: return decision,decision_score,today_close,today
|
737
941
|
|
738
942
|
print(" Model poll for stock price after "+str(ndays)+" trading days:",decision)
|
739
|
-
print("Last close price: "+ticker+', '+str(
|
740
|
-
|
741
|
-
|
943
|
+
print(" Last close price: "+codetranslate(ticker)+', closed '+str(known_close)+', '+str(known_day))
|
944
|
+
|
945
|
+
ndate=date_adjust(known_day,adjust=ndays)
|
946
|
+
|
947
|
+
print(" Prediction for stock close price on",ndate,"\b:",decision)
|
948
|
+
|
949
|
+
return decision,decision_score,known_close,known_day
|
742
950
|
|
743
|
-
if __name__=='__main__':
|
744
|
-
df=get_money_flowin(ticker)
|
745
|
-
df=price_price_knn('600519',df,ndays=1,max_neighbours=3,max_RS=2)
|
746
951
|
|
747
952
|
#==============================================================================
|
748
953
|
if __name__=='__main__':
|
749
|
-
ticker='600519'
|
954
|
+
ticker='600519.SS'
|
750
955
|
ndays=1
|
751
|
-
market='sh'
|
752
|
-
diff=0.03
|
753
|
-
votes=100
|
754
956
|
max_neighbours=3
|
755
957
|
max_RS=2
|
958
|
+
|
959
|
+
forecast=forecast_price_knn(ticker,ndays=1,end='2023-6-15')
|
960
|
+
forecast=forecast_price_knn(ticker,ndays=3,end='2023-6-15')
|
756
961
|
|
757
|
-
def forecast_price_knn(ticker,ndays=1,diff=0.03,min_score=0.
|
758
|
-
|
962
|
+
#def forecast_price_knn(ticker,ndays=1,diff=0.03,min_score=0.7,votes=100,max_neighbours=10,max_RS=10,end='latest'):
|
963
|
+
def forecast_price_knn(ticker,ndays=1,max_neighbours=10,max_RS=20,end='latest'):
|
759
964
|
"""
|
760
965
|
功能:基于个股资金流动预测未来股票价格
|
761
966
|
ticker:股票代码,无后缀
|
762
967
|
ndays:预测几天后的股价,默认1天
|
763
|
-
|
764
|
-
|
765
|
-
votes
|
968
|
+
diff:弃用。泛化精度,越小越好
|
969
|
+
min_score:弃用。最小训练/测试分数,越大越好
|
970
|
+
votes:弃用。软表决均值,默认最大100
|
766
971
|
max_neighbours:最大邻居个数,默认10个
|
767
|
-
max_RS
|
972
|
+
max_RS:最大随机数种子,越大越好
|
973
|
+
|
974
|
+
注意:结果经常相当不靠谱!
|
768
975
|
"""
|
769
976
|
print("\nStart forecasting, it may take great time, please wait ...")
|
770
977
|
|
771
978
|
#抓取个股资金净流入情况df和大盘指数情况dp
|
772
|
-
|
773
|
-
|
774
|
-
#测试用
|
775
|
-
df=df0.copy()
|
979
|
+
df=get_money_flowin(ticker,end=end)
|
776
980
|
|
777
981
|
#预测未来股价涨跌
|
778
982
|
decisionlist=[]
|
779
983
|
confidencelist=[]
|
780
984
|
for nd in list(range(1,ndays+1)):
|
781
|
-
|
782
|
-
|
985
|
+
"""
|
986
|
+
decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
|
987
|
+
diff=diff, \
|
988
|
+
min_score=min_score, \
|
989
|
+
votes=votes, \
|
990
|
+
max_neighbours=max_neighbours, \
|
991
|
+
max_RS=max_RS)
|
992
|
+
"""
|
993
|
+
decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
|
994
|
+
max_neighbours=max_neighbours, \
|
995
|
+
max_RS=max_RS)
|
996
|
+
|
783
997
|
decisionlist=decisionlist+[decision]
|
784
998
|
confidencelist=confidencelist+[confidence]
|
785
999
|
|
786
|
-
print("\nStock information:",ticker,
|
787
|
-
|
788
|
-
for
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
1000
|
+
print("\nStock information:",codetranslate(ticker),'\b, closed',known_close,'@',known_day)
|
1001
|
+
|
1002
|
+
for nd in list(range(1,ndays+1)):
|
1003
|
+
ndate=date_adjust(known_day, adjust=nd)
|
1004
|
+
pred=decisionlist[nd-1]
|
1005
|
+
conf=confidencelist[nd-1]
|
1006
|
+
|
1007
|
+
if (pred != 0) & (conf != 0):
|
1008
|
+
if pred != '?':
|
1009
|
+
diff=decision - known_close
|
1010
|
+
if abs(diff) < 0.05:
|
1011
|
+
comment="MINOR difference with previous one"
|
1012
|
+
elif diff > 0:
|
1013
|
+
comment="HIGHER than previous one"
|
1014
|
+
else:
|
1015
|
+
comment="LOWER than previous one"
|
1016
|
+
|
1017
|
+
print("Forecasting stock prices on",ndate,'\b:',pred,'('+str(round(conf*100,1))+'% confident)')
|
1018
|
+
print("Forecasting stock prices trend on",ndate,'\b:',comment)
|
1019
|
+
else:
|
1020
|
+
print("Forecasting stock prices on",ndate,'\b: uncertain')
|
793
1021
|
else:
|
794
|
-
print(
|
795
|
-
print('\b.')
|
1022
|
+
print("Forecasting stock prices on",ndate,'\b: market closed')
|
796
1023
|
|
797
|
-
return
|
1024
|
+
return decisionlist,confidencelist
|
798
1025
|
|
799
|
-
if __name__=='__main__':
|
800
|
-
df=forecast_price_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
|
801
1026
|
|
802
1027
|
#==============================================================================
|
803
1028
|
#==============================================================================
|