siat 3.10.125__py3-none-any.whl → 3.10.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. siat/common.py +106 -2
  2. siat/exchange_bond_china.pickle +0 -0
  3. siat/fund_china.pickle +0 -0
  4. siat/stock.py +10 -2
  5. siat/stock_info.pickle +0 -0
  6. {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/METADATA +235 -226
  7. siat-3.10.127.dist-info/RECORD +76 -0
  8. {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/WHEEL +1 -1
  9. {siat-3.10.125.dist-info → siat-3.10.127.dist-info/licenses}/LICENSE +0 -0
  10. {siat-3.10.125.dist-info → siat-3.10.127.dist-info}/top_level.txt +0 -0
  11. siat/__init__ -20240701.py +0 -65
  12. siat/__init__.py.backup_20250214.py +0 -73
  13. siat/alpha_vantage_test.py +0 -24
  14. siat/assets_liquidity_test.py +0 -44
  15. siat/barrons_scraping_test.py +0 -276
  16. siat/beta_adjustment_test.py +0 -77
  17. siat/bond_test.py +0 -142
  18. siat/capm_beta_test.py +0 -49
  19. siat/cmat_commons.py +0 -961
  20. siat/compare_cross_test.py +0 -117
  21. siat/concepts_iwencai.py +0 -86
  22. siat/concepts_kpl.py +0 -93
  23. siat/cryptocurrency_test.py +0 -71
  24. siat/derivative.py +0 -1111
  25. siat/economy-20230125.py +0 -1206
  26. siat/economy_test.py +0 -360
  27. siat/esg_test.py +0 -63
  28. siat/fama_french_test.py +0 -115
  29. siat/financial_statements_test.py +0 -31
  30. siat/financials2 - /321/205/320/231/320/277/321/206/320/254/320/274.py" +0 -341
  31. siat/financials_china2_test.py +0 -67
  32. siat/financials_china2_test2.py +0 -88
  33. siat/financials_china2_test3.py +0 -87
  34. siat/financials_china_test.py +0 -475
  35. siat/financials_china_test2.py +0 -197
  36. siat/financials_china_test2_fin_indicator.py +0 -197
  37. siat/financials_test.py +0 -713
  38. siat/fred_test.py +0 -40
  39. siat/fund_china_test.py +0 -175
  40. siat/fund_test.py +0 -40
  41. siat/future_china_test.py +0 -37
  42. siat/global_index_test.py +0 -66
  43. siat/grafix_test.py +0 -112
  44. siat/holding_risk_test.py +0 -13
  45. siat/local_debug_test.py +0 -100
  46. siat/markowitz2-20240620.py +0 -2614
  47. siat/markowitz_ccb_test.py +0 -37
  48. siat/markowitz_ef_test.py +0 -136
  49. siat/markowitz_old.py +0 -871
  50. siat/markowitz_simple-20230709.py +0 -370
  51. siat/markowitz_test.py +0 -164
  52. siat/markowitz_test2.py +0 -69
  53. siat/ml_cases_example1.py +0 -60
  54. siat/option_china_test.py +0 -447
  55. siat/option_pricing_test.py +0 -81
  56. siat/option_sina_api_test.py +0 -112
  57. siat/proxy_test.py +0 -84
  58. siat/quandl_test.py +0 -39
  59. siat/risk_adjusted_return_test.py +0 -81
  60. siat/risk_evaluation_test.py +0 -96
  61. siat/risk_free_rate_test.py +0 -127
  62. siat/sector_china_test.py +0 -203
  63. siat/security_price.py +0 -831
  64. siat/security_prices_test.py +0 -310
  65. siat/security_trend2-20240620.py +0 -493
  66. siat/setup.py +0 -41
  67. siat/shenwan index history test.py +0 -41
  68. siat/stock_china_test.py +0 -38
  69. siat/stock_info_test.py +0 -189
  70. siat/stock_list_china_test.py +0 -33
  71. siat/stock_technical-20240620.py +0 -2736
  72. siat/stock_test.py +0 -487
  73. siat/temp.py +0 -36
  74. siat/test2_graphviz.py +0 -484
  75. siat/test_graphviz.py +0 -411
  76. siat/test_markowitz_simple.py +0 -198
  77. siat/test_markowitz_simple_revised.py +0 -215
  78. siat/test_markowitz_simple_revised2.py +0 -218
  79. siat/transaction_test.py +0 -436
  80. siat/translate-20230125.py +0 -2107
  81. siat/translate-20230206.py +0 -2109
  82. siat/translate-20230215.py +0 -2158
  83. siat/translate_20240606.py +0 -4206
  84. siat/translate_241003_keep.py +0 -4300
  85. siat/universal_test.py +0 -100
  86. siat/valuation_market_china_test.py +0 -36
  87. siat-3.10.125.dist-info/RECORD +0 -152
siat/cmat_commons.py DELETED
@@ -1,961 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- 版权:王德宏,北京外国语大学国际商学院
4
- 功能:提供CMAT资本市场与投资管理分析工具包的精选公共函数,便于各个插件直接引用或复制
5
- 版本:1.10,2019-10-9
6
- """
7
-
8
- #==============================================================================
9
- #屏蔽所有警告性信息
10
- import warnings; warnings.filterwarnings('ignore')
11
- #==============================================================================
12
- #以下使用雅虎财经数据源
13
- #==============================================================================
14
- def check_period(fromdate,todate):
15
- """
16
- 功能:根据开始/结束日期检查期间日期的合理性
17
- 输入参数:
18
- fromdate:开始日期。格式:YYYY-MM-DD
19
- enddate:开始日期。格式:YYYY-MM-DD
20
- 输出参数:
21
- validity:期间合理性。True-合理,False-不合理
22
- start:开始日期。格式:datetime类型
23
- end:结束日期。格式:datetime类型
24
- """
25
- import pandas as pd
26
- try:
27
- start=pd.to_datetime(fromdate)
28
- except:
29
- print("Error #1(check_period): invalid date:",fromdate)
30
- return None,None,None
31
- try:
32
- end=pd.to_datetime(todate)
33
- except:
34
- print("Error #2(check_period): invalid date:",todate)
35
- return None,None,None
36
- if start > end:
37
- print("Error #3(check_period): invalid period: from",fromdate,"to",todate)
38
- return None,None,None
39
-
40
- return True,start,end
41
-
42
- #==============================================================================
43
- def get_prices_yahoo(ticker,fromdate,todate):
44
- """
45
- 功能:从雅虎财经抓取股票股价或指数价格或投资组合价值,使用pandas_datareader
46
- 输入:股票代码或股票代码列表,开始日期,结束日期
47
- ticker: 股票代码或者股票代码列表。
48
- 大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
49
- fromdate: 样本开始日期。
50
- todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
51
-
52
- 输出:股票价格序列,按照日期升序排列。原汁原味的抓取数据
53
- *Close price adjusted for splits.
54
- **Adjusted close price adjusted for both dividends and splits.
55
- """
56
- #检查期间合理性
57
- result,start,end=check_period(fromdate,todate)
58
- if result is None:
59
- print("Error #1(get_prices_yahoo): incorrect date or invalid period!")
60
- return None
61
-
62
- #抓取雅虎股票价格
63
- from pandas_datareader import data
64
- try:
65
- prices=data.DataReader(ticker,'yahoo',start,end)
66
- except:
67
- print("Error #2(get_prices_yahoo): failed to get stock prices!")
68
- print("Information:",ticker,fromdate,todate)
69
- print("Possible reasons:")
70
- print(" 1)internet connection problems.")
71
- print(" 2)incorrect stock code.")
72
- print(" 3)stock delisted or suspended during the period.")
73
- return None
74
- if len(prices)==0:
75
- print("Error #3(get_prices_yahoo): fetched empty stock data!")
76
- print("Possible reasons:")
77
- print(" 1)internet connection problems.")
78
- print(" 2)incorrect stock code.")
79
- print(" 3)stock delisted or suspended during the period.")
80
- return None
81
-
82
- #去掉比起始日期更早的样本
83
- price2=prices[prices.index >= start]
84
- #去掉比结束日期更晚的样本
85
- price2=price2[price2.index <= end]
86
-
87
- #按日期升序排序,近期的价格排在后面
88
- sortedprice=price2.sort_index(axis=0,ascending=True)
89
-
90
- return sortedprice
91
-
92
-
93
- #==============================================================================
94
- def cvt_yftickerlist(ticker):
95
- """
96
- 功能:转换pandas_datareader的tickerlist为yfinance的格式
97
- 输入参数:单一股票代码或pandas_datareader的股票代码列表
98
-
99
- 输出参数:yfinance格式的股票代码列表
100
- """
101
- #如果不是股票代码列表,直接返回股票代码
102
- if not isinstance(ticker,list): return ticker,False
103
-
104
- #如果是股票代码列表,但只有一个元素
105
- if len(ticker)==1: return ticker[0],False
106
-
107
- #如果是股票代码列表,有两个及以上元素
108
- yftickerlist=ticker[0]
109
- for t in ticker[1:]:
110
- yftickerlist=yftickerlist+' '+t
111
-
112
- return yftickerlist,True
113
-
114
-
115
- if __name__=='__main__':
116
- tl1,islist=cvt_yftickerlist('AAPL')
117
- tl1,islist=cvt_yftickerlist(['AAPL'])
118
- tl1,islist=cvt_yftickerlist(['AAPL','MSFT'])
119
- tl1,islist=cvt_yftickerlist(['AAPL','MSFT','0700.hk'])
120
- print(tl1)
121
-
122
- #==============================================================================
123
- def get_prices_yf(ticker,start,end):
124
- """
125
- 功能:从雅虎财经抓取股价,使用yfinance(对非美股抓取速度快,但有时不太稳定)
126
- 输入:股票代码或股票代码列表,开始日期,结束日期
127
- ticker: 股票代码或股票代码列表。大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
128
- start: 样本开始日期,尽量远的日期,以便取得足够多的原始样本,yyyy-mm-dd
129
- end: 样本结束日期,既可以是今天日期,也可以是一个历史日期
130
-
131
- 输出:指定收盘价格序列,最新日期的股价排列在前
132
- """
133
-
134
- #仅为调试用的函数入口参数,正式使用前需要注释掉!
135
- """
136
- ticker=['AAPL','MSFT']
137
- start='2019-10-1'
138
- end='2019-10-10'
139
- """
140
- #---------------------------------------------
141
-
142
- #转换日期
143
- r,startdate,enddate=check_period(start,end)
144
- if r is None:
145
- print("Error #1(get_prices_yf): invalid time period")
146
- return None
147
-
148
- #抓取雅虎股票价格
149
- import yfinance as yf
150
- try:
151
- ticker1,islist=cvt_yftickerlist(ticker)
152
- if not islist:
153
- stock=yf.Ticker(ticker1)
154
- #下载单一股票的股价
155
- p=stock.history(start=start,end=end)
156
- else:
157
- #下载股票列表的股价
158
- p=yf.download(ticker1,start=start,end=end,progress=False)
159
-
160
- except:
161
- print("Error #1(get_prices_yf): server not responsed!")
162
- return None
163
-
164
- if len(p) == 0:
165
- print("Error #2(get_prices_yf): server reached but returned no data!")
166
- return None
167
-
168
- #去掉比起始日期更早的样本
169
- price=p[p.index >= startdate]
170
- #去掉比结束日期更晚的样本
171
- price2=price[price.index <= enddate]
172
-
173
- #按日期升序排序,近期的价格排在后面
174
- sortedprice=price2.sort_index(axis=0,ascending=True)
175
-
176
- #返回日期升序的股价序列
177
- return sortedprice
178
-
179
- if __name__=='__main__':
180
- df1=get_prices_yf('AAPL','2019-10-1','2019-10-8')
181
- df2=get_prices_yf(['AAPL'],'2019-10-1','2019-10-8')
182
- df3=get_prices_yf(['AAPL','MSFT'],'2019-10-1','2019-10-8')
183
- df4=get_prices_yf(['AAPL','MSFT','IBM'],'2019-10-1','2019-10-8')
184
-
185
- #==============================================================================
186
- def get_stock_prices(ticker,fromdate,todate):
187
- """
188
- 功能:从雅虎财经抓取股票股价或指数价格
189
- 输入:股票代码或股票代码列表,开始日期,结束日期
190
- ticker: 股票代码。大陆股票代码加上后缀.SZ或.SS,港股代码去掉前导0加后缀.HK
191
- fromdate: 样本开始日期,尽量远的日期,以便取得足够多的原始样本
192
- todate: 样本结束日期,既可以是今天日期,也可以是一个历史日期
193
-
194
- 输出:股票价格序列,按照日期升序排列。标记股票代码、星期几和收盘价调整标志
195
- *Close price adjusted for splits.
196
- **Adjusted close price adjusted for both dividends and splits.
197
- """
198
-
199
- #仅为调试用的函数入口参数,正式使用前需要注释掉!
200
- #ticker='AAPL'
201
- #fromdate='2019-9-1'
202
- #todate='2019-9-15'
203
- #---------------------------------------------
204
-
205
- #抓取股票价格
206
- prices=get_prices_yahoo(ticker,fromdate,todate)
207
- if prices is None:
208
- print("Error #2(get_stock_prices): failed to get stock prices!")
209
- return None
210
-
211
- #提取日期和星期几
212
- prices['Date']=prices.index.strftime("%Y-%m-%d")
213
- prices['Weekday']=prices.index.weekday+1
214
-
215
- #标记股票代码
216
- try:
217
- stocklist=False
218
- prices['Stock']=ticker #单个股票代码
219
- except:
220
- stocklist=True
221
- prices['Stock']=str(ticker) #股票代码列表
222
-
223
- #标记收盘价是否经过调整(股票分拆分红)
224
- if not stocklist: #若为股票列表不做此步
225
- prices['Adjustment']=prices.apply(lambda x: \
226
- False if x['Close']==x['Adj Close'] else True,axis=1)
227
- stockdf=prices[['Stock','Date','Weekday', \
228
- 'Open','Close','Adj Close','Volume','Adjustment']]
229
- else:
230
- stockdf=prices[['Stock','Date','Weekday', \
231
- 'Open','Close','Adj Close','Volume']]
232
- return stockdf
233
-
234
-
235
- if __name__=='__main__':
236
- df1=get_stock_prices('601857.SS','2012-01-01','2019-12-31')
237
- df2=get_stock_prices('MSFT','01/01/2015','06/30/2019')
238
- df2[df2.Date == '06/28/2019']
239
- df2[(df2.Date>='03/20/2019') & (df2.Date<='03/29/2019')]
240
- df3=get_stock_prices('^GSPC','1/1/2015','6/30/2019')
241
- df4=get_stock_prices('002504.SZ','01/01/2015','06/30/2019')
242
- df5=get_stock_prices('000001.SS','01/01/2015','07/16/2019')
243
- df6=get_stock_prices('0700.HK','01/01/2015','06/30/2019')
244
-
245
- #==============================================================================
246
- def get_portfolio_prices(tickerlist,sharelist,fromdate,todate):
247
- """
248
- 功能:抓取投资组合的每日价值
249
- 输入:股票代码列表,份额列表,开始日期,结束日期
250
- tickerlist: 股票代码列表
251
- sharelist:持有份额列表,与股票代码列表一一对应
252
- fromdate: 样本开始日期。格式:'YYYY-MM-DD'
253
- todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
254
-
255
- 输出:投资组合的价格序列,按照日期升序排列
256
- """
257
-
258
- #仅为调试用的函数入口参数,正式使用前需要注释掉!
259
- #tickerlist=['AAPL','MSFT']
260
- #sharelist=[2,1]
261
- #fromdate='2019-8-1'
262
- #todate ='2019-8-31'
263
- #---------------------------------------------
264
-
265
- #检查股票列表个数与份额列表个数是否一致
266
- if len(tickerlist) != len(sharelist):
267
- print("Error #1(get_portfolio_prices): numbers of stocks and shares mismatch.")
268
- return None
269
-
270
- #从雅虎财经抓取股票价格
271
- p=get_prices_yahoo(tickerlist,fromdate,todate)
272
-
273
- import pandas as pd
274
- #计算投资者的开盘价
275
- op=p['Open']
276
- #计算投资组合的价值
277
- oprice=pd.DataFrame(op.dot(sharelist))
278
- oprice.rename(columns={0: 'Open'}, inplace=True)
279
-
280
- #计算投资者的收盘价
281
- cp=p['Close']
282
- #计算投资组合的价值
283
- cprice=pd.DataFrame(cp.dot(sharelist))
284
- cprice.rename(columns={0: 'Close'}, inplace=True)
285
-
286
- #计算投资者的调整收盘价
287
- acp=p['Adj Close']
288
- #计算投资组合的价值
289
- acprice=pd.DataFrame(acp.dot(sharelist))
290
- acprice.rename(columns={0: 'Adj Close'}, inplace=True)
291
-
292
- #合成开盘价、收盘价和调整收盘价
293
- ocprice=pd.merge(oprice,cprice,how='inner',left_index=True,right_index=True)
294
- prices=pd.merge(ocprice,acprice,how='inner',left_index=True,right_index=True)
295
-
296
- #提取日期和星期几
297
- prices['Date']=prices.index.strftime("%Y-%m-%d")
298
- prices['Weekday']=prices.index.weekday+1
299
-
300
- prices['Portfolio']=str(tickerlist)
301
- prices['Shares']=str(sharelist)
302
- prices['Adjustment']=prices.apply(lambda x: \
303
- False if x['Close']==x['Adj Close'] else True, axis=1)
304
-
305
- stockdf=prices[['Portfolio','Shares','Date','Weekday', \
306
- 'Open','Close','Adj Close','Adjustment']]
307
-
308
- return stockdf
309
-
310
-
311
- #==============================================================================
312
- #以下专门处理tushare数据源
313
- #==============================================================================
314
- def convert_date_ts(y4m2d2):
315
- """
316
- 功能:日期格式转换,YYYY-MM-DD-->YYYYMMDD,用于tushare
317
- 输入:日期,格式:YYYY-MM-DD
318
- 输出:日期,格式:YYYYMMDD
319
- """
320
- import pandas as pd
321
- try: date1=pd.to_datetime(y4m2d2)
322
- except:
323
- print("Error #1(convert_date_tushare): invalid date:",y4m2d2)
324
- return None
325
- else:
326
- date2=date1.strftime('%Y')+date1.strftime('%m')+date1.strftime('%d')
327
- return date2
328
-
329
- if __name__ == '__main__':
330
- convert_date_ts("2019/11/1")
331
-
332
- #==============================================================================
333
- def init_ts():
334
- """
335
- 功能:初始化tushare pro,登录后才能下载数据
336
- """
337
- import tushare as ts
338
- #设置token
339
- token='49f134b05e668d288be43264639ac77821ab9938ff40d6013c0ed24f'
340
- pro=ts.pro_api(token)
341
-
342
- return pro
343
- #==============================================================================
344
- def get_stock_prices_ts(ticker,fromdate,todate):
345
- """
346
- 功能:从tushare抓取大陆股票股价
347
- 特别注意:只能处理大陆股票和指数价格,不能处理投资组合价值,与雅虎财经的函数不同
348
- 输入:股票代码,开始日期,结束日期
349
- ticker: 股票代码。也可以是股指代码
350
- 股票代码加上后缀.SZ或.SH
351
- fromdate: 样本开始日期。
352
- todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
353
-
354
- 输出:股票价格序列,按照日期升序排列。
355
- """
356
- #仅为调试使用,完成后应注释掉
357
- #ticker='601857.SS'
358
- #fromdate='2019-8-1'
359
- #todate='2019-12-31'
360
-
361
- #检查期间合理性
362
- result,start,end=check_period(fromdate,todate)
363
- if result is None:
364
- print("Error #1(get_stock_prices_ts): invalid date period!")
365
- return None
366
-
367
- #转换日期格式为tushare
368
- start=convert_date_ts(fromdate)
369
- end=convert_date_ts(todate)
370
- #转换股票代码.SS为.SH(tushare使用.SH而不是雅虎的.SS)
371
- ticker1=ticker.upper()
372
- try: ticker2=ticker1.replace('.SS','.SH')
373
- except: pass
374
-
375
- #初始化tushare
376
- pro=init_ts()
377
- #抓取tushare股票价格
378
- try:
379
- prices=pro.daily(ts_code=ticker2,start_date=start,end_date=end)
380
- except:
381
- print("Error #2(get_stock_prices_ts): failed to get stock prices!")
382
- print("Information:",ticker2,fromdate,todate)
383
- print("Possible reasons:")
384
- print(" 1)internet connection problems.")
385
- print(" 2)incorrect stock code.")
386
- print(" 3)stock delisted or suspended during the period.")
387
- return None
388
-
389
- #未出错,但也未能抓取到数据,可能ticker是指数代码
390
- if len(prices)==0:
391
- try:
392
- prices=pro.index_daily(ts_code=ticker2,start_date=start,end_date=end)
393
- except:
394
- print("Error #3(get_stock_prices_ts): failed to get index prices!")
395
- print("Information:",ticker2,fromdate,todate)
396
- print("Possible reasons:")
397
- print(" 1)internet connection problems.")
398
- print(" 2)data source server busy.")
399
- if len(prices)==0:
400
- print("Error #4(get_stock_prices_ts): fetched empty index data!")
401
- print("Information:",ticker2,fromdate,todate)
402
- return None
403
-
404
- #按照雅虎财经格式改列名
405
- prices.rename(columns={'ts_code':'Stock','open':'Open','high':'High', \
406
- 'low':'Low','close':'Close', \
407
- 'amount':'Amount'}, inplace = True)
408
- #修改交易日期格式为YYYY-MM-DD
409
- prices['YYYY']=prices.apply(lambda x:x['trade_date'][0:4],axis=1)
410
- prices['MM']=prices.apply(lambda x:x['trade_date'][4:6],axis=1)
411
- prices['DD']=prices.apply(lambda x:x['trade_date'][6:8],axis=1)
412
- prices['Date']=prices['YYYY']+'-'+prices['MM']+'-'+prices['DD']
413
- #将交易量从手改为股,1手=100股
414
- prices['Volume']=prices['vol']*100
415
-
416
- #设置索引
417
- import pandas as pd
418
- prices['DateIndex']=pd.to_datetime(prices['Date'])
419
- prices.set_index('DateIndex',inplace=True)
420
- #提取星期
421
- prices['Weekday']=prices.index.weekday+1
422
-
423
- #按日期升序排序,近期的价格排在后面
424
- price2=prices.sort_index(axis=0,ascending=True)
425
-
426
- #去掉比起始日期更早的样本
427
- price2=price2[price2.index >= start]
428
- #去掉比结束日期更晚的样本
429
- price2=price2[price2.index <= end]
430
-
431
- #只保留需要的列
432
- stockdf=price2[['Stock','Date','Weekday','Open','Close','Volume']]
433
-
434
- return stockdf
435
-
436
-
437
- #==============================================================================
438
- def get_portfolio_prices_ts(tickerlist,sharelist,fromdate,todate):
439
- """
440
- 功能:从tushare抓取投资组合的每日价值
441
- 输入:股票代码列表,份额列表,开始日期,结束日期
442
- tickerlist: 仅限大陆股票代码列表
443
- sharelist:持有份额列表,与股票代码列表一一对应
444
- fromdate: 样本开始日期。格式:'YYYY-MM-DD'
445
- todate: 样本结束日期。既可以是今天日期,也可以是一个历史日期
446
-
447
- 输出:投资组合的价格序列,按照日期升序排列
448
- """
449
-
450
- #仅为调试用的函数入口参数,正式使用前需要注释掉!
451
- #tickerlist=['601857.SH','000002.SZ']
452
- #sharelist=[2,1]
453
- #fromdate='2019-8-1'
454
- #todate ='2019-8-31'
455
- #---------------------------------------------
456
-
457
- #检查股票列表个数与份额列表个数是否一致
458
- if len(tickerlist) != len(sharelist):
459
- print("Error #1(get_portfolio_prices): numbers of stocks and shares mismatch.")
460
- return None
461
-
462
- import pandas as pd
463
- #循环抓取投资组合中的各个成分股,分别存入开盘价、收盘价和调整收盘价字典
464
- dict_open={}
465
- dict_close={}
466
- for t in tickerlist:
467
- p=get_stock_prices_ts(t,fromdate,todate)
468
- if p is None:
469
- print("Error #2(get_portfolio_prices_ts): fetch stock prices failed.")
470
- print("Information:",t,fromdate,todate)
471
- return None
472
-
473
- p_open=p.copy()
474
- p_open2=pd.DataFrame(p_open['Open'])
475
- p_open2.rename(columns={'Open':t},inplace = True)
476
- dict_open[t]=p_open2
477
-
478
- p_close=p.copy()
479
- p_close2=pd.DataFrame(p_close['Close'])
480
- p_close2.rename(columns={'Close':t},inplace = True)
481
- dict_close[t]=p_close2
482
-
483
- #合成各个成分股的开盘价、收盘价
484
- for t in tickerlist:
485
- if t == tickerlist[0]: #第一个成分股
486
- p_open3=dict_open[t]
487
- p_close3=dict_close[t]
488
- else:
489
- p_open3=pd.merge(p_open3,dict_open[t],how='inner', \
490
- left_index=True,right_index=True)
491
- p_close3=pd.merge(p_close3,dict_close[t],how='inner', \
492
- left_index=True,right_index=True)
493
-
494
- #计算投资组合的开盘价
495
- oprice=pd.DataFrame(p_open3.dot(sharelist))
496
- oprice.rename(columns={0:'Open'},inplace=True)
497
-
498
- #计算投资者的收盘价
499
- cprice=pd.DataFrame(p_close3.dot(sharelist))
500
- cprice.rename(columns={0:'Close'},inplace=True)
501
-
502
- #合成开盘价、收盘价
503
- prices=pd.merge(oprice,cprice,how='inner',left_index=True,right_index=True)
504
-
505
- #提取日期和星期几
506
- prices['Date']=prices.index.strftime("%Y-%m-%d")
507
- prices['Weekday']=prices.index.weekday+1
508
-
509
- prices['Portfolio']=str(tickerlist)
510
- prices['Shares']=str(sharelist)
511
-
512
- stockdf=prices[['Portfolio','Shares','Date','Weekday','Open','Close']]
513
-
514
- return stockdf
515
-
516
- #==============================================================================
517
- #以下专门处理fama_french因子数据源
518
- #==============================================================================
519
- def get_ff_factors(start,end,scope='US',factor='FF3',freq='daily'):
520
-
521
- import pandas as pd
522
- s=pd.DataFrame([
523
- ['US','FF3','monthly','F-F_Research_Data_Factors',0],
524
- ['US','FF3','yearly','F-F_Research_Data_Factors',1],
525
- ['US','FF3','weekly','F-F_Research_Data_Factors_weekly',0],
526
- ['US','FF3','daily','F-F_Research_Data_Factors_daily',0],
527
- ['US','FF5','monthly','F-F_Research_Data_5_Factors_2x3',0],
528
- ['US','FF5','yearly','F-F_Research_Data_5_Factors_2x3',1],
529
- ['US','FF5','daily','F-F_Research_Data_5_Factors_2x3_daily',0],
530
- ['US','Mom','monthly','F-F_Momentum_Factor',0],
531
- ['US','Mom','yearly','F-F_Momentum_Factor',1],
532
- ['US','Mom','daily','F-F_Momentum_Factor_daily',0],
533
- ['US','ST_Rev','monthly','F-F_ST_Reversal_Factor',0],
534
- ['US','ST_Rev','yearly','F-F_ST_Reversal_Factor',1],
535
- ['US','ST_Rev','daily','F-F_ST_Reversal_Factor_daily',0],
536
- ['US','LT_Rev','monthly','F-F_LT_Reversal_Factor',0],
537
- ['US','LT_Rev','yearly','F-F_LT_Reversal_Factor',1],
538
- ['US','LT_Rev','daily','F-F_LT_Reversal_Factor_daily',0], \
539
- ['Global','FF3','monthly','Global_3_Factors',0],
540
- ['Global','FF3','yearly','Global_3_Factors',1],
541
- ['Global','FF3','daily','Global_3_Factors_Daily',0],
542
- ['Global_ex_US','FF3','monthly','Global_ex_US_3_Factors',0],
543
- ['Global_ex_US','FF3','yearly','Global_ex_US_3_Factors',1],
544
- ['Global_ex_US','FF3','daily','Global_ex_US_3_Factors_Daily',0],
545
- ['Europe','FF3','monthly','Europe_3_Factors',0],
546
- ['Europe','FF3','yearly','Europe_3_Factors',1],
547
- ['Europe','FF3','daily','Europe_3_Factors_Daily',0],
548
- ['Japan','FF3','monthly','Japan_3_Factors',0],
549
- ['Japan','FF3','yearly','Japan_3_Factors',1],
550
- ['Japan','FF3','daily','Japan_3_Factors_Daily',0],
551
- ['Asia_Pacific_ex_Japan','FF3','monthly','Asia_Pacific_ex_Japan_3_Factors',0],
552
- ['Asia_Pacific_ex_Japan','FF3','yearly','Asia_Pacific_ex_Japan_3_Factors',1],
553
- ['Asia_Pacific_ex_Japan','FF3','daily','Asia_Pacific_ex_Japan_3_Factors_Daily',0],
554
- ['North_America','FF3','monthly','North_America_3_Factors',0],
555
- ['North_America','FF3','yearly','North_America_3_Factors',1],
556
- ['North_America','FF3','daily','North_America_3_Factors_Daily',0], \
557
- ['Global','FF5','monthly','Global_5_Factors',0],
558
- ['Global','FF5','yearly','Global_5_Factors',1],
559
- ['Global','FF5','daily','Global_5_Factors_Daily',0],
560
- ['Global_ex_US','FF5','monthly','Global_ex_US_5_Factors',0],
561
- ['Global_ex_US','FF5','yearly','Global_ex_US_5_Factors',1],
562
- ['Global_ex_US','FF5','daily','Global_ex_US_5_Factors_Daily',0],
563
- ['Europe','FF5','monthly','Europe_5_Factors',0],
564
- ['Europe','FF5','yearly','Europe_5_Factors',1],
565
- ['Europe','FF5','daily','Europe_5_Factors_Daily',0],
566
- ['Japan','FF5','monthly','Japan_5_Factors',0],
567
- ['Japan','FF5','yearly','Japan_5_Factors',1],
568
- ['Japan','FF5','daily','Japan_5_Factors_Daily',0],
569
- ['Asia_Pacific_ex_Japan','FF5','monthly','Asia_Pacific_ex_Japan_5_Factors',0],
570
- ['Asia_Pacific_ex_Japan','FF5','yearly','Asia_Pacific_ex_Japan_5_Factors',1],
571
- ['Asia_Pacific_ex_Japan','FF5','daily','Asia_Pacific_ex_Japan_5_Factors_Daily',0],
572
- ['North_America','FF5','monthly','North_America_5_Factors',0],
573
- ['North_America','FF5','yearly','North_America_5_Factors',1],
574
- ['North_America','FF5','daily','North_America_5_Factors_Daily',0], \
575
- ['Global','Mom','monthly','Global_Mom_Factor',0],
576
- ['Global','Mom','yearly','Global_Mom_Factor',1],
577
- ['Global','Mom','daily','Global_Mom_Factor_Daily',0],
578
- ['Global_ex_US','Mom','monthly','Global_ex_US_Mom_Factor',0],
579
- ['Global_ex_US','Mom','yearly','Global_ex_US_Mom_Factor',1],
580
- ['Global_ex_US','Mom','daily','Global_ex_US_Mom_Factor_Daily',0],
581
- ['Europe','Mom','monthly','Europe_Mom_Factor',0],
582
- ['Europe','Mom','yearly','Europe_Mom_Factor',1],
583
- ['Europe','Mom','daily','Europe_Mom_Factor_Daily',0],
584
- ['Japan','Mom','monthly','Japan_Mom_Factor',0],
585
- ['Japan','Mom','yearly','Japan_Mom_Factor',1],
586
- ['Japan','Mom','daily','Japan_Mom_Factor_Daily',0],
587
- ['Asia_Pacific_ex_Japan','Mom','monthly','Asia_Pacific_ex_Japan_MOM_Factor',0],
588
- ['Asia_Pacific_ex_Japan','Mom','yearly','Asia_Pacific_ex_Japan_MOM_Factor',1],
589
- ['Asia_Pacific_ex_Japan','Mom','daily','Asia_Pacific_ex_Japan_MOM_Factor_Daily',0],
590
- ['North_America','Mom','monthly','North_America_Mom_Factor',0],
591
- ['North_America','Mom','yearly','North_America_Mom_Factor',1],
592
- ['North_America','Mom','daily','North_America_Mom_Factor_Daily',0]
593
- ], columns=['scope','factor','freq','symbol','seq'])
594
-
595
- #数据源
596
- source='famafrench'
597
- if scope == "China": scope="Asia_Pacific_ex_Japan"
598
-
599
- #匹配:scope+factor+freq
600
- ss=s[s['scope'].isin([scope]) & s['factor'].isin([factor]) \
601
- & s['freq'].isin([freq])]
602
- #如果未找到匹配的模式,显示信息后返回
603
- if len(ss)==0:
604
- print("Error #1(get_ff_factors): No data item available for",scope,factor,freq)
605
- return None
606
-
607
- #重新索引,第1行的索引编号为0
608
- sss=ss.reset_index(drop=True)
609
- #取出对应的symbol
610
- symbol=sss.iloc[0]['symbol']
611
- #取出对应的月(0)/年(1)编号
612
- seq=sss.iloc[0]['seq']
613
-
614
- #抓取数据
615
- import pandas_datareader.data as web
616
- try:
617
- ds = web.DataReader(symbol,source,start,end)
618
- except:
619
- print("Error #2(get_ff_factors): Server did not respond")
620
- return None
621
-
622
- #提取希望的资产定价因子
623
- factor_df=ds[seq]
624
- if len(factor_df)==0:
625
- print("Error #3(get_ff_factors): Server returned empty data for",start,end,scope,factor,freq)
626
- return None
627
-
628
- return factor_df
629
-
630
-
631
- if __name__=='__main__':
632
- ff3_daily=get_ff_factors('2019-05-01','2019-06-30','US','FF3','daily')
633
-
634
-
635
- #==============================================================================
636
- #以下为线性回归函数
637
- #==============================================================================
638
- def check_reg_sample(X,y):
639
- """
640
- 功能:检查回归的样本数据是否存在问题
641
- 输入参数:
642
- X:解释变量
643
- y:因变量
644
- 输出参数:
645
- True:样本满足条件,False:不满足回归条件
646
- """
647
- result=True
648
- #检查样本个数是否为空
649
- if (len(X)==0):
650
- print("Error #1(check_reg_sample): no obs for independent variable(s)")
651
- print("Independent variable(s):",X)
652
- result=False
653
- if (len(y)==0):
654
- print("Error #2(check_reg_sample): no obs for dependent variable")
655
- print("Dependent variable:",y)
656
- result=False
657
-
658
- #检查样本中是否含有空缺值
659
- X1=X.dropna()
660
- if (len(X) != len(X1)):
661
- print("Error #3(check_reg_sample): missing value(s) in independent variable(s)")
662
- print("Missing value(s) in independent variable(s):",len(X)-len(X1))
663
- result=False
664
- y1=y.dropna()
665
- if (len(y) != len(y1)):
666
- print("Error #4(check_reg_sample): missing value(s) in dependent variable")
667
- print("Missing value(s) in dependent variable:",len(y)-len(y1))
668
- result=False
669
-
670
- #检查因变量与解释变量的样本个数是否一致
671
- if len(X) != len(y):
672
- print("Error #5(check_reg_sample): sample numbers of independent/dependent variables not match")
673
- print("Obs of X and y respectively:",len(X),len(y))
674
- result=False
675
-
676
- return result
677
-
678
- #==============================================================================
679
- def linreg(X,y):
680
- """
681
- 函数功能:单个解释变量的简单线性回归,例如CAPM回归。y=a+b*X
682
- 输入参数:
683
- X: 解释变量。必须为序列,一维数组
684
- y: 因变量。必须为序列,一维数组
685
- 输出数据:
686
- beta:解释变量的系数。如果解释变量为单变量则为单一数值,否则为列表
687
- alpha:截距项
688
- r_sqr:拟合优度
689
- p_value:解释变量的系数显著性。如果解释变量为单变量则为单一数值,否则为列表
690
- std_err:误差项
691
- 注意:X和y中不能含有NaN/None等空缺值
692
- """
693
-
694
- check=check_reg_sample(X,y)
695
- if not check:
696
- print("Error #1(linreg): invalid sample for regression")
697
- return None,None,None,None,None
698
-
699
- #一元简单回归
700
- from scipy import stats
701
- (beta,alpha,r_value,p_value,std_err)=stats.linregress(X,y)
702
- r_sqr=r_value**2
703
-
704
- return beta,alpha,r_sqr,p_value,std_err
705
-
706
- if __name__=='__main__':
707
- pass
708
-
709
-
710
- #==============================================================================
711
- def sigstars(p_value):
712
- """
713
- 功能:将p_value转换成显著性的星星
714
- """
715
- if p_value >= 0.1:
716
- stars=" "
717
- return stars
718
- if 0.1 > p_value >= 0.05:
719
- stars="* "
720
- return stars
721
- if 0.05 > p_value >= 0.01:
722
- stars="** "
723
- return stars
724
- if 0.01 > p_value:
725
- stars="***"
726
- return stars
727
-
728
- #==============================================================================
729
- def regparms(results):
730
- """
731
- 功能:将sm回归结果生成数据框,包括变量名称、系数数值、t值、p值和显著性星星
732
- """
733
-
734
- import pandas as pd
735
- #取系数
736
- params=results.params
737
- df_params=pd.DataFrame(params)
738
- df_params.columns=['coef']
739
-
740
- #取t值
741
- tvalues=results.tvalues
742
- df_tvalues=pd.DataFrame(tvalues)
743
- df_tvalues.columns=['t_values']
744
-
745
- #取p值
746
- pvalues=results.pvalues
747
- df_pvalues=pd.DataFrame(pvalues)
748
- df_pvalues.columns=['p_values']
749
-
750
- #生成星星
751
- df_pvalues['sig']=df_pvalues['p_values'].apply(lambda x:sigstars(x))
752
-
753
- #合成
754
- parms1=pd.merge(df_params,df_tvalues, \
755
- how='inner',left_index=True,right_index=True)
756
- parms2=pd.merge(parms1,df_pvalues, \
757
- how='inner',left_index=True,right_index=True)
758
-
759
- return parms2
760
- #==============================================================================
761
- def smreg(X,y):
762
- """
763
- 函数功能:多元线性回归。y=a+b1*x1+b2*x2+b3*x3
764
- 输入参数:
765
- X: 解释变量。多维数组,数据框
766
- y: 因变量。必须为序列,一维数组
767
- 输出数据:
768
- beta:解释变量的系数。如果解释变量为单变量则为单一数值,否则为列表
769
- alpha:截距项
770
- r_sqr:拟合优度
771
- p_value:解释变量的系数显著性。如果解释变量为单变量则为单一数值,否则为列表
772
- std_err:误差项
773
- 注意:X和y中不能含有NaN/None等空缺值
774
- """
775
-
776
- check=check_reg_sample(X,y)
777
- if not check:
778
- print("Error #1(smreg): invalid sample for regression")
779
- return None,None,None,None,None
780
-
781
- import statsmodels.api as sm
782
- #加入截距项
783
- X1 = sm.add_constant(X)
784
- #多元线性回归
785
- reg = sm.OLS(y,X1).fit()
786
- #回归结果
787
- parms=regparms(reg)
788
-
789
- return parms
790
-
791
- if __name__=='__main__':
792
- pass
793
- #==============================================================================
794
- #以下为不涉及股票数据源的公共工具函数
795
- #==============================================================================
796
- def draw_lines(df,y_label,x_label,axhline_value,axhline_label,title_txt, \
797
- data_label=True):
798
- """
799
- 函数功能:根据df的内容绘制折线图
800
- 输入参数:
801
- df:数据框。有几个字段就绘制几条折现。必须索引,索引值将作为X轴标记点
802
- axhline_label: 水平辅助线标记。如果为空值则不绘制水平辅助线
803
- axhline_value: 水平辅助线的y轴位置
804
- y_label:y轴标记
805
- x_label:x轴标记
806
- title_txt:标题。如需多行,中间用\n分割
807
-
808
- 输出:
809
- 绘制折线图
810
- 无返回数据
811
- """
812
- import matplotlib.pyplot as plt
813
-
814
- #取得df字段名列表
815
- collist=df.columns.values.tolist()
816
-
817
- #绘制折线图
818
- for c in collist:
819
- plt.plot(df[c],label=c,lw=3)
820
- #为折线加数据标签
821
- if data_label==True:
822
- for a,b in zip(df.index,df[c]):
823
- plt.text(a,b+0.02,str(round(b,2)), \
824
- ha='center',va='bottom',fontsize=7)
825
-
826
- #绘制水平辅助线
827
- if axhline_label !="":
828
- plt.axhline(y=axhline_value,label=axhline_label,color='green',linestyle=':')
829
-
830
- #坐标轴标记
831
- plt.ylabel(y_label,fontweight='bold')
832
- if x_label != "":
833
- plt.xlabel(x_label,fontweight='bold')
834
- #图示标题
835
- plt.title(title_txt,fontweight='bold')
836
- plt.xticks(rotation=45)
837
- plt.legend(loc='best')
838
- plt.show()
839
-
840
- return
841
-
842
- if __name__=='__main__':
843
- title_txt="Stock Risk \nCAPM Beta Trends"
844
- draw_lines(df,"market line",1.0,"Beta coefficient","",title_txt)
845
-
846
-
847
-
848
- #==============================================================================
849
- def save_to_excel(df,filedir,excelfile,sheetname):
850
- """
851
- 函数功能:将df保存到Excel文件。
852
- 如果目录不存在提示出错;如果Excel文件不存在则创建之文件并保存到指定的sheet;
853
- 如果Excel文件存在但sheet不存在则增加sheet并保存df内容,原有sheet内容不变;
854
- 如果Excel文件和sheet都存在则追加df内容到已有sheet的末尾
855
- 输入参数:
856
- df: 数据框
857
- filedir: 目录
858
- excelfile: Excel文件名,不带目录,后缀为.xls或.xlsx
859
- sheetname:Excel文件中的sheet名
860
- 输出:
861
- 保存df到Excel文件
862
- 无返回数据
863
-
864
- 注意:如果df中含有以文本表示的数字,写入到Excel会被自动转换为数字类型保存。
865
- 从Excel中读出后为数字类型,因此将会与df的类型不一致
866
- """
867
-
868
- #检查目录是否存在
869
- import os
870
- try:
871
- os.chdir(filedir)
872
- except:
873
- print("Error #1(save_to_excel): folder does not exist")
874
- print("Information:",filedir)
875
- return
876
-
877
- #取得df字段列表
878
- dflist=df.columns
879
- #合成完整的带目录的文件名
880
- filename=filedir+'/'+excelfile
881
-
882
- import pandas as pd
883
- try:
884
- file1=pd.ExcelFile(excelfile)
885
- except:
886
- #不存在excelfile文件,直接写入
887
- df.to_excel(filename,sheet_name=sheetname, \
888
- header=True,encoding='utf-8')
889
- print("***Results saved in",filename,"@ sheet",sheetname)
890
- return
891
- else:
892
- #已存在excelfile文件,先将所有sheet的内容读出到dict中
893
- dict=pd.read_excel(file1, None)
894
- file1.close()
895
-
896
- #获得所有sheet名字
897
- sheetlist=list(dict.keys())
898
-
899
- #检查新的sheet名字是否已存在
900
- try:
901
- pos=sheetlist.index(sheetname)
902
- except:
903
- #不存在重复
904
- dup=False
905
- else:
906
- #存在重复,合并内容
907
- dup=True
908
- #合并之前可能需要对df中以字符串表示的数字字段进行强制类型转换.astype('int')
909
- df1=dict[sheetlist[pos]][dflist]
910
- dfnew=pd.concat([df1,df],axis=0,ignore_index=True)
911
- dict[sheetlist[pos]]=dfnew
912
-
913
- #将原有内容写回excelfile
914
- result=pd.ExcelWriter(filename)
915
- for s in sheetlist:
916
- df1=dict[s][dflist]
917
- df1.to_excel(result,s,header=True,index=True,encoding='utf-8')
918
- #写入新内容
919
- if not dup: #sheetname未重复
920
- df.to_excel(result,sheetname,header=True,index=True,encoding='utf-8')
921
- try:
922
- result.save()
923
- result.close()
924
- except:
925
- print("Error #2(save_to_excel): writing file permission denied")
926
- print("Information:",filename)
927
- return
928
- print("***Results saved in",filename,"@ sheet",sheetname)
929
- return
930
-
931
- if __name__=='__main__':
932
- pass
933
- #==============================================================================
934
- def gen_yearlist(start_year,end_year):
935
- """
936
- 功能:产生从start_year到end_year的一个年度列表
937
- 输入参数:
938
- start_year: 开始年份,字符串
939
- end_year:截止年份
940
- 输出参数:
941
- 年份字符串列表
942
- """
943
- #仅为测试使用,完成后应注释掉
944
- #start_year='2010'
945
- #end_year='2019'
946
-
947
- import numpy as np
948
- start=int(start_year)
949
- end=int(end_year)
950
- num=end-start+1
951
- ylist=np.linspace(start,end,num=num,endpoint=True)
952
-
953
- yearlist=[]
954
- for y in ylist:
955
- yy='%d' %y
956
- yearlist=yearlist+[yy]
957
- #print(yearlist)
958
-
959
- return yearlist
960
- #==============================================================================
961
-