siat 2.0.19__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
siat/stock_china.py CHANGED
@@ -20,54 +20,88 @@ from siat.security_prices import *
20
20
  #==============================================================================
21
21
 
22
22
  if __name__=='__main__':
23
- ticker='600519'
23
+ ticker='600519.Ss'
24
24
 
25
- def get_money_flowin(ticker):
25
+ dfp=get_money_flowin(ticker)
26
+
27
+ end='2023-6-15'
28
+ dfp=get_money_flowin(ticker,end)
29
+
30
+ def get_money_flowin(ticker,end='latest'):
26
31
  """
27
32
  功能:抓取个股近一百个交易日的资金净流入情况,以及大盘指数的情况
28
- ticker:个股代码,不带后缀
33
+ ticker:个股代码,带后缀
29
34
  标准化方法:原始数据
35
+ 注意:目前仅支持沪深股市
30
36
  """
37
+ ticker1=ticker.upper()[:6]
38
+ exch=ticker.upper()[7:9]
39
+
31
40
  import akshare as ak
32
41
  import pandas as pd
33
42
 
34
43
  #判断沪深市场
35
- l1=ticker[0]; market='sh'
36
- if l1 in ['0','2','3']: market='sz'
37
- #深市股票以0/2/3开头,沪市以6/9开头
38
-
44
+ if exch=='':
45
+ l1=ticker[0]; market='sh'
46
+ if l1 in ['0','2','3']: market='sz'
47
+ #深市股票以0/2/3开头,沪市以6/9开头
48
+ else:
49
+ if exch=='SS':
50
+ market='sh'
51
+ elif exch=='SZ':
52
+ market='sz'
53
+ else:
54
+ print(" #Warning(get_money_flowin): currently only support Shanghai/Shenzhen stock exchanges")
55
+ return None
56
+
39
57
  #获得个股资金流动明细
40
58
  try:
41
- df = ak.stock_individual_fund_flow(stock=ticker, market=market)
59
+ df = ak.stock_individual_fund_flow(stock=ticker1, market=market)
42
60
  except:
43
- print("#Error(predict_price_direction): stock code not found for",ticker)
44
- return
61
+ print(" #Error(get_money_flowin): money flow info unavailable for",ticker)
62
+ return None
45
63
 
46
64
  df['ticker']=ticker
65
+
66
+ df['Date']=df['日期'].apply(lambda x: pd.to_datetime(x)) #不带时区的日期
67
+ df.set_index('Date',inplace=True)
47
68
  df['date']=df['日期']
69
+
70
+ df['Close']=df['收盘价'].apply(lambda x: float(x))
71
+ df['Change%']=df['涨跌幅'].apply(lambda x: float(x))
72
+
48
73
  #类型转换
49
74
  df['netFlowInAmount_main']=df['主力净流入-净额'].apply(lambda x: float(x))
50
75
  df['netFlowInAmount_small']=df['小单净流入-净额'].apply(lambda x: float(x))
51
76
  df['netFlowInAmount_mid']=df['中单净流入-净额'].apply(lambda x: float(x))
52
77
  df['netFlowInAmount_big']=df['大单净流入-净额'].apply(lambda x: float(x))
53
78
  df['netFlowInAmount_super']=df['超大单净流入-净额'].apply(lambda x: float(x))
79
+
80
+ # 总净流入金额:可正可负
54
81
  df['netFlowInAmount']=df['netFlowInAmount_main']+df['netFlowInAmount_small']+ \
55
- df['netFlowInAmount_mid']+df['netFlowInAmount_big']+df['netFlowInAmount_super']
82
+ df['netFlowInAmount_mid']+df['netFlowInAmount_big']+ \
83
+ df['netFlowInAmount_super']
56
84
 
57
85
  df['netFlowInRatio%_main']=df['主力净流入-净占比'].apply(lambda x: float(x))
58
86
  df['netFlowInRatio%_small']=df['小单净流入-净占比'].apply(lambda x: float(x))
59
87
  df['netFlowInRatio%_mid']=df['中单净流入-净占比'].apply(lambda x: float(x))
60
88
  df['netFlowInRatio%_big']=df['大单净流入-净占比'].apply(lambda x: float(x))
61
89
  df['netFlowInRatio%_super']=df['超大单净流入-净占比'].apply(lambda x: float(x))
62
-
63
- #重要:删除有缺失值的记录,确保未收盘时能预测当天的收盘价涨跌方向
64
- df.dropna(inplace=True)
65
-
66
- df['Close']=df['收盘价'].apply(lambda x: float(x))
67
- df['Change%']=df['涨跌幅'].apply(lambda x: float(x))
68
90
 
69
- df['Date']=df['日期'].apply(lambda x: pd.to_datetime(x)) #不带时区的日期
70
- df.set_index('Date',inplace=True)
91
+ #重要:处理有缺失值的记录,确保未收盘时能预测当天的收盘价涨跌方向
92
+ #df.dropna(inplace=True)
93
+ df.fillna(0,inplace=True)
94
+
95
+ # 对数量级巨大的金额项进行对数处理,避免其对数量级小的项形成数量级压制
96
+ amtColList=['netFlowInAmount_main','netFlowInAmount_small','netFlowInAmount_mid', \
97
+ 'netFlowInAmount_big','netFlowInAmount_super','netFlowInAmount']
98
+ import math
99
+ for a in amtColList:
100
+ # 取对数
101
+ df[a+'_ln']=df[a].apply(lambda x: math.log(x) if x>0 else -math.log(-x) if x<0 else 0)
102
+
103
+ # 删除原有金额项目
104
+ df.drop(amtColList,axis=1,inplace=True)
71
105
 
72
106
  #去掉不用的字段
73
107
  dfdroplist=['主力净流入-净额','小单净流入-净额','中单净流入-净额','大单净流入-净额', \
@@ -76,36 +110,47 @@ def get_money_flowin(ticker):
76
110
  '收盘价','涨跌幅','日期']
77
111
  df.drop(labels=dfdroplist,axis=1,inplace=True)
78
112
 
79
- #获得大盘指数
80
- dpindex="sh000001" #上证综合指数
81
- if market == 'sz': dpindex="sz399001" #深圳成分指数
82
- dp=ak.stock_zh_index_daily(symbol=dpindex)
83
- dp['Date']=dp.index
84
- dp['Date']=dp['Date'].apply(lambda x: x.replace(tzinfo=None)) #去掉时区信息
85
- dp.set_index('Date',inplace=True)
86
-
87
- #去掉不用的字段
88
- dpdroplist=['open','high','low']
89
- dp.drop(labels=dpdroplist,axis=1,inplace=True)
90
- dp.rename(columns={'close':'dpClose','volume':'dpVolume'}, inplace = True)
91
-
92
- #合并大盘指数:索引日期均不带时区,否则出错
93
- dfp=pd.merge(df,dp,how='left',left_index=True,right_index=True)
94
-
95
- """
96
- #取得标签/特征向量
97
- ydf=dfp[['Close','Change%']]
98
- X=dfp.drop(labels=['date','Close','Change%'],axis=1)
99
-
100
- scaler_X=preproc(X,preproctype=preproctype)
101
- scaler_dfp=pd.merge(scaler_X,ydf,how='left',left_index=True,right_index=True)
102
- return scaler_dfp
103
- """
113
+ df.sort_index(ascending=True,inplace=True)
114
+ fromdate=df['date'].values[0]
115
+ todate=df['date'].values[-1]
116
+
117
+ #获得大盘指数和交易量
118
+ dpindex="000001.SS" #上证综合指数
119
+ if market == 'sz': dpindex="399001.SZ" #深圳成分指数
120
+ df_dp_tmp=get_price(dpindex,fromdate,todate)
121
+
122
+ df_dp_tmp['Volume_mkt']=df_dp_tmp['Volume'].apply(lambda x: math.log(x))
123
+ df_dp_tmp['Close_mkt']=df_dp_tmp['Close']
124
+ df_dp=df_dp_tmp[['Volume_mkt','Close_mkt']]
125
+
126
+ # 获得股票交易量
127
+ df_stk_tmp=get_price(ticker,fromdate,todate)
128
+ df_stk_tmp['Volume_ln']=df_stk_tmp['Volume'].apply(lambda x: math.log(x))
129
+ df_stk=df_stk_tmp[['Open','High','Low','Adj Close','Volume_ln']]
130
+
131
+ # 合并
132
+ df_stk_mkt=pd.merge(df_stk,df_dp,how='left',left_index=True,right_index=True)
133
+
134
+ dfp_tmp=pd.merge(df,df_stk_mkt,how='left',left_index=True,right_index=True)
135
+ newColList=['date','Close','Change%','Adj Close','Open','High','Low','Volume_ln', \
136
+ 'netFlowInRatio%_main','netFlowInRatio%_small','netFlowInRatio%_mid', \
137
+ 'netFlowInRatio%_big','netFlowInRatio%_super', \
138
+ 'netFlowInAmount_main_ln','netFlowInAmount_small_ln','netFlowInAmount_mid_ln', \
139
+ 'netFlowInAmount_big_ln','netFlowInAmount_super_ln', 'netFlowInAmount_ln', \
140
+ 'Close_mkt','Volume_mkt']
141
+ dfp=dfp_tmp[newColList]
142
+
143
+ if end=='latest':
144
+ dfp2=dfp
145
+ else:
146
+ jieguo,end2=check_date2(end)
147
+ if not jieguo:
148
+ dfp2=dfp
149
+ else:
150
+ dfp2=dfp[dfp['date'] <= end2]
104
151
 
105
- return dfp
152
+ return dfp2
106
153
 
107
- if __name__=='__main__':
108
- dfp=get_money_flowin('600519')
109
154
 
110
155
  #==============================================================================
111
156
  # 对特征数据进行预处理
@@ -169,9 +214,13 @@ if __name__=='__main__':
169
214
  #==============================================================================
170
215
  if __name__=='__main__':
171
216
  ndays=1
172
- preCumTimes=1
217
+ preCumTimes=5
218
+
219
+ dfp=get_money_flowin('600519.SS')
220
+
221
+ X,ydf,X_new=make_sample(dfp,ndays=1)
173
222
 
174
- def make_sample(dfp,ndays=1,preCumTimes=1):
223
+ def make_sample(dfp,ndays=1,preCumTimes=5):
175
224
  """
176
225
  功能:构造适合机器学习的样本
177
226
  ndays:预测未来几个交易日
@@ -183,55 +232,68 @@ def make_sample(dfp,ndays=1,preCumTimes=1):
183
232
  preDays=ndays * preCumTimes
184
233
 
185
234
  #构造过去一段时间资金净流入累加值
186
- dfp['netFlowInAmtCum_main']=dfp['netFlowInAmount_main'].rolling(window=preDays,min_periods=1).sum()
187
- dfp['netFlowInAmtCum_small']=dfp['netFlowInAmount_small'].rolling(window=preDays,min_periods=1).sum()
188
- dfp['netFlowInAmtCum_mid']=dfp['netFlowInAmount_mid'].rolling(window=preDays,min_periods=1).sum()
189
- dfp['netFlowInAmtCum_big']=dfp['netFlowInAmount_big'].rolling(window=preDays,min_periods=1).sum()
190
- dfp['netFlowInAmtCum_super']=dfp['netFlowInAmount_super'].rolling(window=preDays,min_periods=1).sum()
191
- dfp['netFlowInAmtCum']=dfp['netFlowInAmount'].rolling(window=preDays,min_periods=1).sum()
235
+ amtColList=[]
236
+ colList=list(dfp)
237
+ for c in colList:
238
+ if 'Amount' in c:
239
+ amtColList=amtColList+[c]
240
+
241
+ for c in amtColList:
242
+ dfp[c+'_cum']=dfp[c].rolling(window=preDays,min_periods=1).sum()
192
243
 
193
244
  #构造过去一段时间资金净流入比例均值
194
- dfp['netFlowInRatioAvg%_main']=dfp['netFlowInRatio%_main'].rolling(window=preDays,min_periods=1).mean()
195
- dfp['netFlowInRatioAvg%_small']=dfp['netFlowInRatio%_small'].rolling(window=preDays,min_periods=1).mean()
196
- dfp['netFlowInRatioAvg%_mid']=dfp['netFlowInRatio%_mid'].rolling(window=preDays,min_periods=1).mean()
197
- dfp['netFlowInRatioAvg%_big']=dfp['netFlowInRatio%_big'].rolling(window=preDays,min_periods=1).mean()
198
- dfp['netFlowInRatioAvg%_super']=dfp['netFlowInRatio%_super'].rolling(window=preDays,min_periods=1).mean()
199
-
245
+ ratioColList=[]
246
+ for c in colList:
247
+ if 'Ratio' in c:
248
+ ratioColList=ratioColList+[c]
249
+
250
+ for c in ratioColList:
251
+ dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
252
+
200
253
  #构造过去一段时间大盘指数的均值和标准差
201
- dfp['dpCloseAvg']=dfp['dpClose'].rolling(window=preDays,min_periods=1).mean()
202
- #dfp['dpCloseStd']=dfp['dpClose'].rolling(window=preDays,min_periods=1).std()
203
- dfp['dpVolumeAvg']=dfp['dpVolume'].rolling(window=preDays,min_periods=1).mean()
204
- #dfp['dpVolumeStd']=dfp['dpVolume'].rolling(window=preDays,min_periods=1).std()
254
+ mktColList=['Close_mkt','Volume_mkt']
255
+ for c in mktColList:
256
+ dfp[c+'_avg']=dfp[c].rolling(window=preDays,min_periods=1).mean()
257
+ dfp[c+'_std']=dfp[c].rolling(window=preDays,min_periods=1).std()
205
258
 
206
259
  #重要:去掉前几行,此处位置敏感
207
- dfp.dropna(inplace=True)
208
-
209
- #添加未来更多天的股价信息
210
- ylist=[]
211
- for nd in list(range(1,ndays+1)):
212
- dfp['Close_next'+str(nd)]=dfp['Close'].shift(-nd)
213
- ylist=ylist+['Close_next'+str(nd)]
214
- dfp['Change%_next'+str(nd)]=dfp['Change%'].shift(-nd)
215
- ylist=ylist+['Change%_next'+str(nd)]
216
-
217
- X = dfp[[
218
- 'netFlowInAmount_main','netFlowInAmount_small','netFlowInAmount_mid', \
219
- 'netFlowInAmount_big','netFlowInAmount_super','netFlowInAmount', \
220
-
221
- 'netFlowInAmtCum_main','netFlowInAmtCum_small','netFlowInAmtCum_mid', \
222
- 'netFlowInAmtCum_big','netFlowInAmtCum_super','netFlowInAmtCum', \
223
-
224
- 'netFlowInRatio%_main','netFlowInRatio%_small','netFlowInRatio%_mid', \
225
- 'netFlowInRatio%_big','netFlowInRatio%_super',
226
-
227
- 'netFlowInRatioAvg%_main','netFlowInRatioAvg%_small','netFlowInRatioAvg%_mid', \
228
- 'netFlowInRatioAvg%_big','netFlowInRatioAvg%_super',
229
-
230
- 'dpClose','dpCloseAvg','dpVolume','dpVolumeAvg']]
231
-
232
- ydf = dfp[ylist]
233
-
234
- return X,ydf
260
+ dfp1=dfp.dropna()
261
+ if len(dfp1)==0:
262
+ print(" #Error(make_sample): dropna caused empty dataframe, process stopped")
263
+ return None,None
264
+
265
+ #按照ndays滚动
266
+ newColList=list(dfp1)
267
+ newColList.remove('date')
268
+ dfp2=dfp1[newColList]
269
+ for c in newColList:
270
+ dfp2[c+'_roll']=dfp2[c].rolling(window=ndays,min_periods=1).mean()
271
+
272
+ newColList2=list(dfp2)
273
+ newColList3=[]
274
+ for c in newColList2:
275
+ if '_roll' in c:
276
+ newColList3=newColList3+[c]
277
+ newColList3=newColList3+['Close','Change%']
278
+ dfp3=dfp2[newColList3]
279
+
280
+ #添加未来一个单位(ndays)的股价信息
281
+ dfp3['Close_next']=dfp3['Close'].shift(-1)
282
+ dfp3['Change%_next']=dfp3['Change%'].shift(-1)
283
+ ylist=['Close_next','Change%_next']
284
+
285
+ #供训练用的数据集
286
+ XCollist=list(dfp3)
287
+ for c in ylist:
288
+ XCollist.remove(c)
289
+ dfp4=dfp3.dropna()
290
+ X = dfp4[XCollist]
291
+ ydf = dfp4[ylist]
292
+
293
+ # X_new
294
+ X_new = dfp3.tail(1)[XCollist]
295
+
296
+ return X,ydf,X_new
235
297
 
236
298
  #==============================================================================
237
299
  # 训练模型,获得最优模型参数,进行预测
@@ -254,6 +316,8 @@ def train_predict_knn(X,ydf,noday=1,y='Close', \
254
316
  ydf:标签矩阵
255
317
  nodays:预测未来第几天
256
318
  y:标签,默认'Close'为股价,'Change%'为涨跌幅,'Direction'为涨跌方向
319
+
320
+ 未用!
257
321
  """
258
322
  ylist=['Close','Change%','Direction']
259
323
  if not (y in ylist):
@@ -332,6 +396,8 @@ def train_predict_knn(X,ydf,noday=1,y='Close', \
332
396
  if votes > len(r2): votes=len(r2)
333
397
  r2head=r2.head(votes)
334
398
 
399
+ # 未完
400
+
335
401
  #==============================================================================
336
402
  # 训练,获得最优模型参数
337
403
  #==============================================================================
@@ -347,6 +413,8 @@ def training_knn_clf(scaler_X,ydf,ndays=1,max_neighbors=10,max_p=6,cv=5,rs=0):
347
413
  功能:对(X,y)
348
414
  scaler_X: 特征矩阵
349
415
  y:标签矩阵
416
+
417
+ 未用!
350
418
  '''
351
419
 
352
420
  #获得分类变量y
@@ -397,71 +465,61 @@ def training_knn_clf(scaler_X,ydf,ndays=1,max_neighbors=10,max_p=6,cv=5,rs=0):
397
465
  # Forecasting stock price directions by money flow in/out, using knn
398
466
  #==============================================================================
399
467
  if __name__=='__main__':
400
- ticker='600519'
468
+ ticker='600519.SS'
469
+ ticker='000723.SZ'
470
+
401
471
  ndays=1
402
- market='sh'
403
- diff=0.03
472
+ min_score=0.9
473
+ diff=0.01
404
474
  votes=100
405
475
  max_neighbours=10
406
476
  max_RS=10
477
+ printout=True
478
+
479
+ df=get_money_flowin(ticker,end='2023-6-15')
480
+ decision,known_close,known_day=price_direction_knn(ticker,df,ndays=1)
407
481
 
408
- def price_direction_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neighbours=10,max_RS=10,printout=True):
482
+ def price_direction_knn(ticker,df,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=10,printout=True):
409
483
 
410
484
  """
411
485
  功能:基于个股资金流动预测次日股票涨跌方向,涨或跌
412
486
  ticker:股票代码,无后缀
413
487
  df:个股资金净流入
414
- dp:大盘信息
415
488
  ndays:预测几天后的股价涨跌方向,默认1天
416
- market:sh-沪市,sz-深市
417
- diff:泛化精度,越小越好,默认0.03
418
- votes:软表决票数,默认100
489
+ diff:泛化精度,越小越好
490
+ min_score:最小测试分数
491
+ votes:软表决票数
419
492
  max_neighbours:最大邻居个数
420
493
  max_RS:最大随机数种子
421
494
  """
422
495
  import pandas as pd
423
496
 
424
- #构造标签
425
- df['nextClose']=df['Close'].shift(-ndays)
426
- df['nextChange%']=df['Change%'].shift(-ndays)
427
- df['nextDirection']=df['nextChange%'].apply(lambda x: 1 if float(x) > 0 else -1)
428
-
429
- #构造特征
430
- df['netFlowInChg_main']=df['netFlowInAmount_main'] - df['netFlowInAmount_main'].shift(-ndays)
431
- df['netFlowInChg_small']=df['netFlowInAmount_small'] - df['netFlowInAmount_small'].shift(-ndays)
432
- df['netFlowInChg_mid']=df['netFlowInAmount_mid'] - df['netFlowInAmount_mid'].shift(-ndays)
433
- df['netFlowInChg_big']=df['netFlowInAmount_big'] - df['netFlowInAmount_big'].shift(-ndays)
434
- df['netFlowInChg_super']=df['netFlowInAmount_super'] - df['netFlowInAmount_super'].shift(-ndays)
435
- df['netFlowInChg']=df['netFlowInAmount'] - df['netFlowInAmount'].shift(-ndays)
436
-
437
- df['netFlowInRatio%Chg_main']=df['netFlowInRatio%_main'] - df['netFlowInRatio%_main'].shift(-ndays)
438
- df['netFlowInRatio%Chg_small']=df['netFlowInRatio%_small'] - df['netFlowInRatio%_small'].shift(-ndays)
439
- df['netFlowInRatio%Chg_mid']=df['netFlowInRatio%_mid'] - df['netFlowInRatio%_mid'].shift(-ndays)
440
- df['netFlowInRatio%Chg_big']=df['netFlowInRatio%_big'] - df['netFlowInRatio%_big'].shift(-ndays)
441
- df['netFlowInRatio%Chg_super']=df['netFlowInRatio%_super'] - df['netFlowInRatio%_super'].shift(-ndays)
442
-
443
- df['dpCloseChg']=df['dpClose'] - df['dpClose'].shift(-ndays)
444
- df['dpVolumeChg']=df['dpVolume'] - df['dpVolume'].shift(-ndays)
445
-
446
- df2=df[['date','netFlowInChg_main',
447
- 'netFlowInChg_small','netFlowInChg_mid','netFlowInChg_big', \
448
- 'netFlowInChg_super','netFlowInChg','netFlowInRatio%Chg_main','netFlowInRatio%Chg_small', \
449
- 'netFlowInRatio%Chg_mid','netFlowInRatio%Chg_big','netFlowInRatio%Chg_super', \
450
- 'Close','Change%','dpCloseChg','dpVolumeChg','nextClose','nextChange%','nextDirection']]
451
-
452
- #记录最新指标,用于预测次日涨跌
453
- x_last=df2.copy().tail(1)
454
- today=x_last['date'].values[0]
455
- today_close=x_last['Close'].values[0]
456
- x_last.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1,inplace=True)
457
- X_new = x_last.head(1).values
458
-
459
- #建立样本:特征序列
460
- df2.dropna(inplace=True)
461
- X=df2.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1)
462
-
497
+ # 判断是否周末无交易免预测
498
+ known_day=df.tail(1)['date'].values[0]
499
+
500
+ tdate=date_adjust(known_day, adjust=ndays)
501
+ tdate_pd=pd.to_datetime(tdate)
502
+ tdate_wd=tdate_pd.weekday()
503
+ if tdate_wd in [5,6]:
504
+ print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
505
+ decision='~'
506
+ known_close=df.tail(1)['Close'].values[0]
507
+
508
+ return decision,known_close,known_day
509
+
510
+ #构造样本:标签ydf,特征矩阵X,预测X_new
511
+ X,ydf,X_new=make_sample(df,ndays=ndays)
512
+
513
+ #X_new1=X_new.head(1).values
514
+ known_day_np=X_new.index.values[0]
515
+ known_day_pd=pd.to_datetime(known_day_np)
516
+ known_day=known_day_pd.strftime("%Y-%m-%d")
517
+ known_close=X_new['Close'].values[0]
518
+
519
+ ydf['nextDirection']=ydf['Change%_next'].apply(lambda x: 1 if x>0 else -1 if x <0 else 0)
520
+
463
521
  #建立样本:标签序列
464
- y1=df2['nextDirection'] #二分类
522
+ y1=ydf['nextDirection'] #二分类
465
523
  #y2=df2['nextChange%'] #回归
466
524
  #y3=df2['nextClose'] #回归
467
525
 
@@ -480,17 +538,18 @@ def price_direction_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
480
538
  mlist4=['cityblock','euclidean','minkowski','cosine']
481
539
  mlist=mlist1+mlist2+mlist3+mlist4
482
540
  rslist=list(range(0,max_RS+1))
541
+
483
542
  results=pd.DataFrame(columns=('spread','train_score','test_score', \
484
543
  'neighbours','weight','metric','random','pred'))
485
544
  print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
486
- print(' Progress: 0%, ',end='')
545
+ print(' Progress: 0% ',end='')
487
546
  for n in nlist:
488
547
  for w in wlist:
489
548
  for m in mlist:
490
549
  for rs in rslist:
491
550
  knn1=KNeighborsClassifier(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
492
551
  X_train,X_test,y_train,y_test=train_test_split(X,y1,random_state=rs)
493
- knn1.fit(X_train, y_train)
552
+ tmp=knn1.fit(X_train, y_train) #调试时去掉tmp=可跟踪模型参数
494
553
  train_score=round(knn1.score(X_train, y_train),3)
495
554
  test_score=round(knn1.score(X_test, y_test),3)
496
555
  prediction=knn1.predict(X_new)[0]
@@ -500,65 +559,145 @@ def price_direction_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
500
559
  'test_score':test_score,'neighbours':n, \
501
560
  'weight':w,'metric':m,'random':rs,'pred':prediction})
502
561
  results=results.append(row,ignore_index=True)
503
- print(int(n/n_num*100),'\b%, ',end='')
562
+ print(int(n/n_num*100),'\b% ',end='')
504
563
  print('done.')
505
564
 
506
- #去掉严重过拟合的结果
507
- r0=results[results['train_score'] < 1]
508
- #去掉训练集、测试集分数不过半的模型
509
- r0=r0[r0['train_score'] > min_score]
510
- r0=r0[r0['test_score'] > min_score]
565
+
566
+ # 过滤训练/测试分数差的结果
567
+ r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
568
+ # 避免min_score设置得太高导致无结果,或者数量太多
569
+ r0_len=len(r0)
570
+ #if r0_len==0 or r0_len > votes:
571
+ if r0_len==0:
572
+ train_score_max=results['train_score'].max()
573
+ test_score_max=results['test_score'].max()
574
+ min_score_new=min(train_score_max,test_score_max)
575
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
576
+
577
+ if len(r0)==0:
578
+ spread_quantile=results['spread'].quantile(q=0.01)
579
+ r0_spread=results[(results['spread']<spread_quantile)]
580
+ if len(r0_spread)==0:
581
+ r0_spread=results[(results['spread']<=spread_quantile)]
582
+
583
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
584
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
585
+ if len(r0)==0:
586
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
587
+
588
+ train_score_max=r0['train_score'].max()
589
+ test_score_max=r0['test_score'].max()
590
+ min_score_new=min(train_score_max,test_score_max)
591
+
592
+ print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
593
+
511
594
  #去掉泛化效果差的结果
512
- r0=r0[r0['spread'] < diff] #限定泛化差距
513
- #优先查看泛化效果最优的结果
514
- r1=r0.sort_values(by=['spread','test_score'],ascending=[True,False])
515
- #优先查看测试分数最高的结果
516
- r2=r0.sort_values(by=['test_score','spread'],ascending=[False,True])
517
-
518
- if votes > len(r2): votes=len(r2)
519
- r2head=r2.head(votes)
595
+ r1=r0[r0['spread'] <= diff] #限定泛化差距
596
+ r1_len=len(r1)
597
+ #if r1_len==0 or r1_len>=votes:
598
+ if r1_len==0:
599
+ diff_new=r0['spread'].min()
600
+ r1=r0[r0['spread'] <= diff_new]
601
+ print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
520
602
 
521
- zhang=len(r2head[r2head['pred']==1])
522
- die=len(r2head[r2head['pred']==-1])
523
603
 
524
- decision='+'
525
- if zhang >= die * 2.0: decision='2+'
526
- if zhang >= die * 3.0: decision='3+'
604
+ """
605
+ # 首选测试效果好的
606
+ r0=results[(results['test_score']>=min_score)]
607
+ if len(r0)==0:
608
+ test_quantile=results['test_score'].quantile(q=0.99,interpolation='nearest')
609
+ r0=results[(results['test_score']>test_quantile)]
610
+ if len(r0)==0:
611
+ r0=results[(results['test_score']>=test_quantile)]
612
+
613
+ print(" #Warning: filtering parameter min_score for testing has been adjusted to",test_quantile)
614
+ """
615
+
616
+ """
617
+ # 首选训练效果好的
618
+ r0=results[(results['train_score']>=min_score) & (results['train_score']<1.0)]
619
+ if len(r0)==0:
620
+ r0_train=results[(results['train_score']>=results['test_score']) & results['train_score']<1.0]
621
+ train_quantile=r0_train['train_score'].quantile(q=0.99,interpolation='nearest')
622
+ r0=r0_train[(r0_train['train_score']>train_quantile)]
623
+ if len(r0)==0:
624
+ r0=r0_train[(r0_train['train_score']>=train_quantile)]
625
+
626
+ print(" #Warning: filtering parameter min_score for training has been adjusted to",train_quantile)
627
+
628
+ # 再选泛化效果好的
629
+ r1=r0[(r0['spread']<=diff)]
630
+ if len(r1)==0:
631
+ spread_quantile=r0['spread'].quantile(q=0.05,interpolation='nearest')
632
+ r1=r0[(r0['spread']<spread_quantile)]
633
+ if len(r1)==0:
634
+ r1=r0[(r0['spread']<=spread_quantile)]
635
+
636
+ print(" #Warning: filtering parameter diff has been adjusted to",spread_quantile)
637
+ """
527
638
 
528
- if die > zhang: decision='-'
529
- if die >= zhang * 2.0: decision='2-'
530
- if die >= zhang * 3.0: decision='3-'
639
+ #优先查看泛化效果最优的结果:spread最小
640
+ votes=min(votes,len(r1))
641
+ r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
642
+ #优先查看测试分数最高的结果:test_score最大
643
+ r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
644
+
645
+ r3head=r3
531
646
 
532
- if abs(zhang-die)/((zhang+die)/2) < 0.05: decision='?'
647
+ zhang=len(r3head[r3head['pred']==1])
648
+ die=len(r3head[r3head['pred']==-1])
649
+
650
+ decision='+'
651
+ decision_text='HIGHER'
652
+ if die != 0:
653
+ if zhang >= die * 2.0: decision='2+'
654
+ if zhang >= die * 3.0: decision='3+'
655
+
656
+ if die > zhang:
657
+ decision='-'
658
+ decision_text='LOWER'
659
+ if zhang != 0:
660
+ if die >= zhang * 2.0: decision='2-'
661
+ if die >= zhang * 3.0: decision='3-'
662
+
663
+ #if abs(zhang-die)/((zhang+die)/2) < 0.05: decision='?'
664
+ if zhang==die:
665
+ decision='='
666
+ decision_text='FLAT'
533
667
 
534
668
  if not printout: return decision,today_close,today
535
669
 
536
- print(" Model poll for stock price after "+str(ndays)+" trading days: Higer("+str(zhang)+'), Lower('+str(die)+')')
537
- print("Last close price: "+ticker+', '+str(today_close)+', '+str(today))
538
- print("Prediction for stock price after "+str(ndays)+" trading day: "+decision)
539
- return decision,today_close,today
670
+ print(" Model voting for stock price after "+str(ndays)+" trading days: Higher("+str(zhang)+'), Lower('+str(die)+')')
671
+ print(" "+codetranslate(ticker)+': previously closed '+str(known_close)+' @ '+str(known_day))
672
+
673
+ ndate=date_adjust(known_day,adjust=ndays)
674
+ print(" Prediction to close at",decision_text,"price trend ("+decision+') @',ndate)
675
+
676
+ return decision,known_close,known_day
540
677
 
541
- if __name__=='__main__':
542
- df=price_direction_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
543
678
 
544
679
  #==============================================================================
545
680
  if __name__=='__main__':
546
- ticker='600519'
681
+ ticker='600519.SS'
547
682
  ndays=1
548
- market='sh'
549
- diff=0.03
683
+ diff=0.01
684
+ min_score=0.9
550
685
  votes=100
551
686
  max_neighbours=3
552
- max_RS=2
687
+ max_RS=20
688
+ preproctype='0-1'
689
+
690
+ df=forecast_direction_knn(ticker,ndays=1)
691
+ df=forecast_direction_knn(ticker,ndays=3)
553
692
 
554
- def forecast_direction_knn(ticker,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neighbours=10,max_RS=10,preproctype='0-1'):
693
+ def forecast_direction_knn(ticker,ndays=1,diff=0.01,min_score=0.9,votes=100,max_neighbours=10,max_RS=20,preproctype='0-1',end='latest'):
555
694
 
556
695
  """
557
696
  功能:基于个股资金流动预测未来股票涨跌方向,涨或跌
558
697
  ticker:股票代码,无后缀
559
698
  ndays:预测几天后的股价涨跌方向,默认1天
560
- market:sh-沪市,sz-深市
561
- diff:泛化精度,越小越好,默认0.03
699
+ diff:泛化精度,越小越好
700
+ min_score:最小训练/学习分数
562
701
  votes:软表决票数,默认最大100
563
702
  max_neighbours:最大邻居个数,默认10个
564
703
  max_RS:最大随机数种子,默认最大为10
@@ -566,96 +705,98 @@ def forecast_direction_knn(ticker,ndays=1,diff=0.03,min_score=0.6,votes=100,max_
566
705
  print("\nStart forecasting, it may take great time, please wait ...")
567
706
 
568
707
  #抓取个股资金净流入情况df和大盘指数情况dp
569
- df0,X,ydf=get_money_flowin(ticker)
570
- scaler_X=preproc(X,preproctype=preproctype)
571
-
572
- #测试用
573
- df=df0.copy()
708
+ df=get_money_flowin(ticker,end=end)
574
709
 
575
710
  #预测未来股价涨跌
576
711
  decisionlist=[]
577
712
  for nd in list(range(1,ndays+1)):
578
- decision,today_close,today=price_direction_knn(ticker,df,ndays=nd, \
579
- diff=diff,min_score=min_score,votes=votes,max_neighbours=max_neighbours,max_RS=max_RS)
713
+ decision,known_close,known_day=price_direction_knn(ticker,df,ndays=nd, \
714
+ diff=diff, \
715
+ min_score=min_score, \
716
+ votes=votes, \
717
+ max_neighbours=max_neighbours, \
718
+ max_RS=max_RS)
580
719
  decisionlist=decisionlist+[decision]
581
720
 
582
- print("\nStock information:",ticker,today_close,today)
583
- print("Forecasting stock prices in next",ndays,"trading days: ",end='')
584
- for i in decisionlist:
585
- print(i,'\b ',end='')
586
- print('\b.')
587
-
588
- return
721
+ print("\nStock information:",codetranslate(ticker),'\b, closed',known_close,'@',known_day)
722
+ for nd in list(range(1,ndays+1)):
723
+ ndate=date_adjust(known_day, adjust=nd)
724
+ print("Forecasting stock close price trend on",ndate,end=': ')
725
+ pred=decisionlist[nd-1]
726
+ if '-' in pred:
727
+ print("LOWER,",pred)
728
+ if '+' in pred:
729
+ print("HIGHER,",pred)
730
+ if '~' in pred:
731
+ print("market closed")
732
+ if '=' in pred:
733
+ print("least different")
734
+
735
+ return decisionlist,known_close,known_day
589
736
 
590
- if __name__=='__main__':
591
- df=forecast_direction_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
592
737
 
593
738
  #==============================================================================
594
739
  # Forecasting stock prices by money flow in/out, using knn
595
740
  #==============================================================================
596
741
 
597
742
  if __name__=='__main__':
598
- ticker='600519'
743
+ ticker='600519.SS'
744
+ ticker='605011.SS'
745
+
599
746
  ndays=1
600
- market='sh'
601
- diff=0.03
747
+ diff=0.01
748
+ min_score=0.8
602
749
  votes=100
603
750
  max_neighbours=10
604
751
  max_RS=10
752
+ printout=True
753
+
754
+ df=get_money_flowin(ticker,end='2023-6-15')
755
+ pred_result=price_price_knn(ticker,df,ndays=1)
756
+ pred_result=price_price_knn(ticker,df,ndays=3)
605
757
 
606
- def price_price_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neighbours=10,max_RS=10,printout=True):
607
-
758
+ #def price_price_knn(ticker,df,ndays=1,diff=0.01,min_score=0.8,votes=100,max_neighbours=10,max_RS=20,printout=True):
759
+ def price_price_knn(ticker,df,ndays=1,max_neighbours=10,max_RS=20,printout=True):
760
+
608
761
  """
609
762
  功能:基于个股资金流动预测次日股票价格
610
763
  ticker:股票代码,无后缀
611
764
  df:个股资金净流入信息
612
- dp:大盘信息
613
765
  ndays:预测几天后的股价涨跌方向,默认1天
614
- market:sh-沪市,sz-深市
615
- diff:泛化精度,越小越好,默认0.03
616
- votes:软表决均值,默认100
766
+ diff:弃用。泛化精度,越小越好
767
+ min_score:弃用。最小训练/测试分数,越大越好
768
+ votes:弃用。软表决均值,默认100
617
769
  max_neighbours:最大邻居个数
618
770
  max_RS:最大随机数种子
619
771
  """
620
772
  import pandas as pd
773
+
774
+ # 判断是否周末无交易免预测
775
+ known_day=df.tail(1)['date'].values[0]
776
+
777
+ tdate=date_adjust(known_day, adjust=ndays)
778
+ tdate_pd=pd.to_datetime(tdate)
779
+ tdate_wd=tdate_pd.weekday()
780
+ if tdate_wd in [5,6]:
781
+ print(" #Warning: after",ndays,"day(s) is",tdate,"(weekend), no trading")
782
+ decision=0
783
+ decision_score=0
784
+ known_close=df.tail(1)['Close'].values[0]
785
+
786
+ return decision,decision_score,known_close,known_day
787
+
788
+ #构造样本:标签ydf,特征矩阵X,预测X_new
789
+ X,ydf,X_new=make_sample(df,ndays=ndays)
621
790
 
622
- #构造标签
623
- df['nextClose']=df['Close'].shift(-ndays)
624
- df['nextChange%']=df['Change%'].shift(-ndays)
625
- df['nextDirection']=df['nextChange%'].apply(lambda x: 1 if float(x) > 0 else -1)
626
-
627
- #构造特征
628
- df['netFlowInChg_main']=df['netFlowInAmount_main']/(df['netFlowInAmount_main'].shift(ndays))
629
- df['netFlowInChg_small']=df['netFlowInAmount_small']/(df['netFlowInAmount_small'].shift(ndays))
630
- df['netFlowInChg_mid']=df['netFlowInAmount_mid']/(df['netFlowInAmount_mid'].shift(ndays))
631
- df['netFlowInChg_big']=df['netFlowInAmount_big']/(df['netFlowInAmount_big'].shift(ndays))
632
- df['netFlowInChg_super']=df['netFlowInAmount_super']/(df['netFlowInAmount_super'].shift(ndays))
633
- df['netFlowInChg']=df['netFlowInAmount']/(df['netFlowInAmount'].shift(ndays))
634
-
635
- df['dpCloseChg']=df['dpClose']/(df['dpClose'].shift(ndays))
636
- df['dpVolumeChg']=df['dpVolume']/(df['dpVolume'].shift(ndays))
637
-
638
- df2=df[['date','netFlowInChg_main',
639
- 'netFlowInChg_small','netFlowInChg_mid','netFlowInChg_big', \
640
- 'netFlowInChg_super','netFlowInChg','netFlowInRatio%_main','netFlowInRatio%_small', \
641
- 'netFlowInRatio%_mid','netFlowInRatio%_big','netFlowInRatio%_super', \
642
- 'Close','Change%','dpCloseChg','dpVolumeChg','nextClose','nextChange%','nextDirection']]
643
-
644
- #记录最新指标,用于预测次日涨跌
645
- x_last=df2.copy().tail(1)
646
- today=x_last['date'].values[0]
647
- today_close=x_last['Close'].values[0]
648
- x_last.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1,inplace=True)
649
- X_new = x_last.head(1).values
650
-
651
- #建立样本:特征序列
652
- df2.dropna(inplace=True)
653
- X=df2.drop(labels=['date','nextClose', 'nextChange%', 'nextDirection'],axis=1)
791
+ known_day_np=X_new.index.values[0]
792
+ known_day_pd=pd.to_datetime(known_day_np)
793
+ known_day=known_day_pd.strftime("%Y-%m-%d")
794
+ known_close=X_new['Close'].values[0]
654
795
 
655
796
  #建立样本:标签序列
656
797
  #y1=df2['nextDirection'] #二分类
657
798
  #y2=df2['nextChange%'] #回归
658
- y3=df2['nextClose'] #回归
799
+ y3=ydf['Close_next'] #回归
659
800
 
660
801
  #拆分训练集和测试集:y1
661
802
  from sklearn.model_selection import train_test_split
@@ -672,10 +813,11 @@ def price_price_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neig
672
813
  mlist4=['cityblock','euclidean','minkowski','cosine']
673
814
  mlist=mlist1+mlist2+mlist3+mlist4
674
815
  rslist=list(range(0,max_RS+1))
816
+
675
817
  results=pd.DataFrame(columns=('spread','train_score','test_score', \
676
818
  'neighbours','weight','metric','random','pred'))
677
819
  print('\nSearching for best parameters of knn model in',ndays,'trading days ...')
678
- print(' Progress: 0%, ',end='')
820
+ print(' Progress: 0% ',end='')
679
821
  for n in nlist:
680
822
  for w in wlist:
681
823
  for m in mlist:
@@ -683,121 +825,204 @@ def price_price_knn(ticker,df,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neig
683
825
  try:
684
826
  knn1=KNeighborsRegressor(n_neighbors=n,weights=w,metric=m,n_jobs=-1)
685
827
  X_train,X_test,y_train,y_test=train_test_split(X,y3,random_state=rs)
686
- knn1.fit(X_train, y_train)
828
+ tmp=knn1.fit(X_train, y_train)
687
829
  train_score=round(knn1.score(X_train, y_train),3)
688
830
  test_score=round(knn1.score(X_test, y_test),3)
689
831
  prediction=knn1.predict(X_new)[0]
690
832
  except:
691
- print(" #Bug: n=",n,"w=",w,"m=",m,"rs=",rs)
692
- break
833
+ #print(" #Bug parameters: n=",n,"w=",w,"m=",m,"rs=",rs)
834
+ #break
835
+ continue
693
836
  spread=abs(round(train_score-test_score,3))
694
837
 
695
838
  row=pd.Series({'spread':spread,'train_score':train_score, \
696
839
  'test_score':test_score,'neighbours':n, \
697
840
  'weight':w,'metric':m,'random':rs,'pred':prediction})
698
841
  results=results.append(row,ignore_index=True)
699
- print(int(n/n_num*100),'\b%, ',end='')
700
- print('done.')
842
+ print(int(n/n_num*100),'\b% ',end='')
843
+ print('.')
844
+
845
+ # 过滤训练/测试分数差的结果
846
+ """
847
+ r0=results[(results['train_score']>=min_score) & (results['test_score']>=min_score)]
848
+
849
+ # 如果最低分数设置太高
850
+ r0_len=len(r0)
851
+ if r0_len==0:
852
+ train_score_max=results['train_score'].max()
853
+ test_score_max=results['test_score'].max()
854
+ min_score_new=min(train_score_max,test_score_max)
855
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
856
+
857
+ if len(r0)==0:
858
+ spread_quantile=results['spread'].quantile(q=0.01)
859
+ r0_spread=results[(results['spread']<spread_quantile)]
860
+ if len(r0_spread)==0:
861
+ r0_spread=results[(results['spread']<=spread_quantile)]
862
+
863
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
864
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
865
+ if len(r0)==0:
866
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
867
+
868
+ train_score_max=r0['train_score'].max()
869
+ test_score_max=r0['test_score'].max()
870
+ min_score_new=min(train_score_max,test_score_max)
871
+
872
+ print(" #Warning: filtering parameter min_score has been adjusted to",min_score_new)
873
+ """
874
+
875
+ train_score_max=results['train_score'].max()
876
+ test_score_max=results['test_score'].max()
877
+ min_score_new=min(train_score_max,test_score_max)
878
+ r0=results[(results['train_score']>=min_score_new) & (results['test_score']>=min_score_new)]
879
+
880
+ if len(r0)==0:
881
+ spread_quantile=results['spread'].quantile(q=0.01)
882
+ r0_spread=results[(results['spread']<spread_quantile)]
883
+ if len(r0_spread)==0:
884
+ r0_spread=results[(results['spread']<=spread_quantile)]
885
+
886
+ test_score_quantile=r0_spread['test_score'].quantile(q=0.99)
887
+ r0=r0_spread[(r0_spread['test_score']>test_score_quantile)]
888
+ if len(r0)==0:
889
+ r0=r0_spread[(r0_spread['test_score']>=test_score_quantile)]
890
+
891
+ train_score_max=r0['train_score'].max()
892
+ test_score_max=r0['test_score'].max()
893
+ min_score_new=min(train_score_max,test_score_max)
701
894
 
702
- #去掉严重过拟合的结果
703
- r0=results[results['train_score'] < 1]
704
- #去掉训练集、测试集分数不过半的模型
705
- r0=r0[r0['train_score'] > min_score]
706
- r0=r0[r0['test_score'] > min_score]
707
895
  #去掉泛化效果差的结果
708
- r0=r0[r0['spread'] < diff] #限定泛化差距
709
- #优先查看泛化效果最优的结果
710
- r1=r0.sort_values(by=['spread','test_score'],ascending=[True,False])
711
- #优先查看测试分数最高的结果
712
- r2=r0.sort_values(by=['test_score','spread'],ascending=[False,True])
713
-
714
- if votes > len(r2): votes=len(r2)
715
- r2head=r2.head(votes)
896
+ """
897
+ r1=r0[r0['spread'] <= diff] #限定泛化差距
898
+ r1_len=len(r1)
899
+
900
+ if r1_len==0:
901
+ diff_new=r0['spread'].min()
902
+ r1=r0[r0['spread'] <= diff_new]
903
+ print(" #Warning: filtering parameter diff has been adjusted to",diff_new)
904
+ """
716
905
 
906
+ diff_new=r0['spread'].min()
907
+ r1=r0[r0['spread'] == diff_new]
717
908
 
718
- #加权平均股价
719
- r2head['w_pred']=r2head['pred'] * r2head['test_score']
720
- w_pred_sum=r2head['w_pred'].sum()
721
- test_score_sum=r2head['test_score'].sum()
722
- decision=round(w_pred_sum / test_score_sum,2)
723
- decision_score=round(r2head['test_score'].mean(),2)
909
+ #优先查看泛化效果最优的结果:spread最小
910
+ """
911
+ votes=min(votes,len(r1))
912
+ r2=r1.sort_values(by=['spread','test_score'],ascending=[True,False]).head(votes)
913
+ #优先查看测试分数最高的结果:test_score最大
914
+ r3=r2.sort_values(by=['test_score','spread'],ascending=[False,True])
915
+ """
916
+
917
+ r3head=r1
918
+ #平均股价
919
+ decision=round(r3head['pred'].mean(),2)
920
+ decision_score=round(r3head['test_score'].mean(),3)
724
921
 
922
+ """
923
+ r3head['w_pred']=r3head['pred'] * r3head['test_score']
924
+ w_pred_sum=r3head['w_pred'].sum()
925
+ test_score_sum=r3head['test_score'].sum()
926
+ decision=round(w_pred_sum / test_score_sum,2)
927
+ decision_score=round(r3head['test_score'].mean(),2)
928
+ """
725
929
  """
726
930
  #股价中位数:偶尔出现奇怪的错误,未找到原因
727
- decision0=r2head['pred'].median()
728
- pos=list(r2head['pred']).index(decision0)
729
- decision_score0=list(r2head['test_score'])[pos]
931
+ decision0=r3head['pred'].median()
932
+ pos=list(r3head['pred']).index(decision0)
933
+ decision_score0=list(r3head['test_score'])[pos]
730
934
  decision=round(decision0,2)
731
935
  decision_score=round(decision_score0,2)
732
936
  """
733
937
  import numpy as np
734
- if decision == np.nan: decision='?'
938
+ if np.isnan(decision): decision='?'
735
939
 
736
940
  if not printout: return decision,decision_score,today_close,today
737
941
 
738
942
  print(" Model poll for stock price after "+str(ndays)+" trading days:",decision)
739
- print("Last close price: "+ticker+', '+str(today_close)+', '+str(today))
740
- print("Prediction for stock price after "+str(ndays)+" trading day:",decision)
741
- return decision,decision_score,today_close,today
943
+ print(" Last close price: "+codetranslate(ticker)+', closed '+str(known_close)+', '+str(known_day))
944
+
945
+ ndate=date_adjust(known_day,adjust=ndays)
946
+
947
+ print(" Prediction for stock close price on",ndate,"\b:",decision)
948
+
949
+ return decision,decision_score,known_close,known_day
742
950
 
743
- if __name__=='__main__':
744
- df=get_money_flowin(ticker)
745
- df=price_price_knn('600519',df,ndays=1,max_neighbours=3,max_RS=2)
746
951
 
747
952
  #==============================================================================
748
953
  if __name__=='__main__':
749
- ticker='600519'
954
+ ticker='600519.SS'
750
955
  ndays=1
751
- market='sh'
752
- diff=0.03
753
- votes=100
754
956
  max_neighbours=3
755
957
  max_RS=2
958
+
959
+ forecast=forecast_price_knn(ticker,ndays=1,end='2023-6-15')
960
+ forecast=forecast_price_knn(ticker,ndays=3,end='2023-6-15')
756
961
 
757
- def forecast_price_knn(ticker,ndays=1,diff=0.03,min_score=0.6,votes=100,max_neighbours=10,max_RS=10):
758
-
962
+ #def forecast_price_knn(ticker,ndays=1,diff=0.03,min_score=0.7,votes=100,max_neighbours=10,max_RS=10,end='latest'):
963
+ def forecast_price_knn(ticker,ndays=1,max_neighbours=10,max_RS=20,end='latest'):
759
964
  """
760
965
  功能:基于个股资金流动预测未来股票价格
761
966
  ticker:股票代码,无后缀
762
967
  ndays:预测几天后的股价,默认1天
763
- market:sh-沪市,sz-深市
764
- diff:泛化精度,越小越好,默认0.03
765
- votes:软表决均值,默认最大100
968
+ diff:弃用。泛化精度,越小越好
969
+ min_score:弃用。最小训练/测试分数,越大越好
970
+ votes:弃用。软表决均值,默认最大100
766
971
  max_neighbours:最大邻居个数,默认10个
767
- max_RS:最大随机数种子,默认最大为10
972
+ max_RS:最大随机数种子,越大越好
973
+
974
+ 注意:结果经常相当不靠谱!
768
975
  """
769
976
  print("\nStart forecasting, it may take great time, please wait ...")
770
977
 
771
978
  #抓取个股资金净流入情况df和大盘指数情况dp
772
- df0=get_money_flowin(ticker)
773
-
774
- #测试用
775
- df=df0.copy()
979
+ df=get_money_flowin(ticker,end=end)
776
980
 
777
981
  #预测未来股价涨跌
778
982
  decisionlist=[]
779
983
  confidencelist=[]
780
984
  for nd in list(range(1,ndays+1)):
781
- decision,confidence,today_close,today=price_price_knn(ticker,df,ndays=nd, \
782
- diff=diff,min_score=min_score,votes=votes,max_neighbours=max_neighbours,max_RS=max_RS)
985
+ """
986
+ decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
987
+ diff=diff, \
988
+ min_score=min_score, \
989
+ votes=votes, \
990
+ max_neighbours=max_neighbours, \
991
+ max_RS=max_RS)
992
+ """
993
+ decision,confidence,known_close,known_day=price_price_knn(ticker,df,ndays=nd, \
994
+ max_neighbours=max_neighbours, \
995
+ max_RS=max_RS)
996
+
783
997
  decisionlist=decisionlist+[decision]
784
998
  confidencelist=confidencelist+[confidence]
785
999
 
786
- print("\nStock information:",ticker,today_close,today)
787
- print("Forecasting stock prices in next",ndays,"trading days: ",end='')
788
- for i in decisionlist:
789
- pos=decisionlist.index(i)
790
- conf=confidencelist[pos]
791
- if i == '?':
792
- print('?',end='')
1000
+ print("\nStock information:",codetranslate(ticker),'\b, closed',known_close,'@',known_day)
1001
+
1002
+ for nd in list(range(1,ndays+1)):
1003
+ ndate=date_adjust(known_day, adjust=nd)
1004
+ pred=decisionlist[nd-1]
1005
+ conf=confidencelist[nd-1]
1006
+
1007
+ if (pred != 0) & (conf != 0):
1008
+ if pred != '?':
1009
+ diff=decision - known_close
1010
+ if abs(diff) < 0.05:
1011
+ comment="MINOR difference with previous one"
1012
+ elif diff > 0:
1013
+ comment="HIGHER than previous one"
1014
+ else:
1015
+ comment="LOWER than previous one"
1016
+
1017
+ print("Forecasting stock prices on",ndate,'\b:',pred,'('+str(round(conf*100,1))+'% confident)')
1018
+ print("Forecasting stock prices trend on",ndate,'\b:',comment)
1019
+ else:
1020
+ print("Forecasting stock prices on",ndate,'\b: uncertain')
793
1021
  else:
794
- print(str(i)+'('+str(conf*100)+'%) ',end='')
795
- print('\b.')
1022
+ print("Forecasting stock prices on",ndate,'\b: market closed')
796
1023
 
797
- return
1024
+ return decisionlist,confidencelist
798
1025
 
799
- if __name__=='__main__':
800
- df=forecast_price_knn('600519',ndays=1,max_neighbours=5,max_RS=2)
801
1026
 
802
1027
  #==============================================================================
803
1028
  #==============================================================================